summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-30 12:22:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-30 12:22:28 -0700
commitc70a4be130de333ea079c59da41cc959712bb01c (patch)
treeefa1b9a7aac979dcbf53ce89e2f8ffc61f6d2952 /arch/powerpc/kvm
parent437d1a5b66ca60f209e25f469b395741cc10b731 (diff)
parent5256426247837feb8703625bda7fcfc824af04cf (diff)
downloadlinux-c70a4be130de333ea079c59da41cc959712bb01c.tar.bz2
Merge tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Enable KFENCE for 32-bit. - Implement EBPF for 32-bit. - Convert 32-bit to do interrupt entry/exit in C. - Convert 64-bit BookE to do interrupt entry/exit in C. - Changes to our signal handling code to use user_access_begin/end() more extensively. - Add support for time namespaces (CONFIG_TIME_NS) - A series of fixes that allow us to reenable STRICT_KERNEL_RWX. - Other smaller features, fixes & cleanups. Thanks to Alexey Kardashevskiy, Andreas Schwab, Andrew Donnellan, Aneesh Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Bixuan Cui, Cédric Le Goater, Chen Huang, Chris Packham, Christophe Leroy, Christopher M. Riedl, Colin Ian King, Dan Carpenter, Daniel Axtens, Daniel Henrique Barboza, David Gibson, Davidlohr Bueso, Denis Efremov, dingsenjie, Dmitry Safonov, Dominic DeMarco, Fabiano Rosas, Ganesh Goudar, Geert Uytterhoeven, Geetika Moolchandani, Greg Kurz, Guenter Roeck, Haren Myneni, He Ying, Jiapeng Chong, Jordan Niethe, Laurent Dufour, Lee Jones, Leonardo Bras, Li Huafei, Madhavan Srinivasan, Mahesh Salgaonkar, Masahiro Yamada, Nathan Chancellor, Nathan Lynch, Nicholas Piggin, Oliver O'Halloran, Paul Menzel, Pu Lehui, Randy Dunlap, Ravi Bangoria, Rosen Penev, Russell Currey, Santosh Sivaraj, Sebastian Andrzej Siewior, Segher Boessenkool, Shivaprasad G Bhat, Srikar Dronamraju, Stephen Rothwell, Thadeu Lima de Souza Cascardo, Thomas Gleixner, Tony Ambardar, Tyrel Datwyler, Vaibhav Jain, Vincenzo Frascino, Xiongwei Song, Yang Li, Yu Kuai, and Zhang Yunkai. * tag 'powerpc-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (302 commits) powerpc/signal32: Fix erroneous SIGSEGV on RT signal return powerpc: Avoid clang uninitialized warning in __get_user_size_allowed powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n powerpc/kasan: Fix shadow start address with modules powerpc/kernel/iommu: Use largepool as a last resort when !largealloc powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants" powerpc/iommu: Annotate nested lock for lockdep powerpc/iommu: Do not immediately panic when failed IOMMU table allocation powerpc/iommu: Allocate it_map by vmalloc selftests/powerpc: remove unneeded semicolon powerpc/64s: remove unneeded semicolon powerpc/eeh: remove unneeded semicolon powerpc/selftests: Add selftest to test concurrent perf/ptrace events powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR powerpc/selftests/perf-hwbreak: Coalesce event creation code powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR powerpc/configs: Add IBMVNIC to some 64-bit configs selftests/powerpc: Add uaccess flush test ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c85
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c37
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c3
5 files changed, 101 insertions, 29 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e452158a18d7..c3e31fef0be1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -8,6 +8,7 @@
*/
#include <linux/kvm_host.h>
+#include <linux/pkeys.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -133,6 +134,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
else
kvmppc_mmu_flush_icache(pfn);
+ rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY);
rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
/*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 13bad6bf4c95..4a532410e128 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
vcpu->arch.dawrx1 = value2;
return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
- /* KVM does not support mflags=2 (AIL=2) */
+ /*
+ * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
+ * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
+ */
if (mflags != 0 && mflags != 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
@@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
return 0;
}
+/*
+ * Enforce limits on guest LPCR values based on hardware availability,
+ * guest configuration, and possibly hypervisor support and security
+ * concerns.
+ */
+unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
+{
+ /* LPCR_TC only applies to HPT guests */
+ if (kvm_is_radix(kvm))
+ lpcr &= ~LPCR_TC;
+
+ /* On POWER8 and above, userspace can modify AIL */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ lpcr &= ~LPCR_AIL;
+ if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
+ lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+
+ /*
+ * On POWER9, allow userspace to enable large decrementer for the
+ * guest, whether or not the host has it enabled.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ lpcr &= ~LPCR_LD;
+
+ return lpcr;
+}
+
+static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
+{
+ if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
+ WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
+ lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
+ }
+}
+
static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
bool preserve_top32)
{
@@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
u64 mask;
spin_lock(&vc->lock);
+
+ /*
+ * Userspace can only modify
+ * DPFD (default prefetch depth), ILE (interrupt little-endian),
+ * TC (translation control), AIL (alternate interrupt location),
+ * LD (large decrementer).
+ * These are subject to restrictions from kvmppc_filter_lcpr_hv().
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
+
+ /* Broken 32-bit version of LPCR must not clear top bits */
+ if (preserve_top32)
+ mask &= 0xFFFFFFFF;
+
+ new_lpcr = kvmppc_filter_lpcr_hv(kvm,
+ (vc->lpcr & ~mask) | (new_lpcr & mask));
+
/*
* If ILE (interrupt little-endian) has changed, update the
* MSR_LE bit in the intr_msr for each vcpu in this vcore.
@@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
}
}
- /*
- * Userspace can only modify DPFD (default prefetch depth),
- * ILE (interrupt little-endian) and TC (translation control).
- * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
- */
- mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- mask |= LPCR_AIL;
- /*
- * On POWER9, allow userspace to enable large decrementer for the
- * guest, whether or not the host has it enabled.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- mask |= LPCR_LD;
+ vc->lpcr = new_lpcr;
- /* Broken 32-bit version of LPCR must not clear top bits */
- if (preserve_top32)
- mask &= 0xFFFFFFFF;
- vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
spin_unlock(&vc->lock);
}
@@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
+ /* Save guest CTRL register, set runlatch to 1 */
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
@@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_DSCR, host_dscr);
mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr);
- mtspr(SPRN_PSPB, 0);
if (host_amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_amr);
@@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
struct kvmppc_vcore *vc = kvm->arch.vcores[i];
if (!vc)
continue;
+
spin_lock(&vc->lock);
vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+ verify_lpcr(kvm, vc->lpcr);
spin_unlock(&vc->lock);
if (++cores_done >= kvm->arch.online_vcores)
break;
@@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvmppc_setup_partition_table(kvm);
}
+ verify_lpcr(kvm, lpcr);
kvm->arch.lpcr = lpcr;
/* Initialization for future HPT resizes */
@@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = {
H_READ,
H_PROTECT,
H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
H_GET_TCE,
H_PUT_TCE,
+#endif
H_SET_DABR,
H_SET_XDABR,
H_CEDE,
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 158d309b42a3..7a0e33a9c980 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
{
+ /* Guest must always run with ME enabled, HV disabled. */
+ msr = (msr | MSR_ME) & ~MSR_HV;
+
/*
* Check for illegal transactional state bit combination
* and if we find it, force the TS field to a safe state.
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 0cd0e7aad588..60724f674421 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
}
}
+/*
+ * This can result in some L0 HV register state being leaked to an L1
+ * hypervisor when the hv_guest_state is copied back to the guest after
+ * being modified here.
+ *
+ * There is no known problem with such a leak, and in many cases these
+ * register settings could be derived by the guest by observing behaviour
+ * and timing, interrupts, etc., but it is an issue to consider.
+ */
static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 mask;
+
+ /*
+ * Don't let L1 change LPCR bits for the L2 except these:
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+ LPCR_LPES | LPCR_MER;
+
+ /*
+ * Additional filtering is required depending on hardware
+ * and configuration.
+ */
+ hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+ (vc->lpcr & ~mask) | (hr->lpcr & mask));
+
/*
* Don't let L1 enable features for L2 which we've disabled for L1,
* but preserve the interrupt cause field.
@@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
u64 hv_ptr, regs_ptr;
u64 hdec_exp;
s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
- u64 mask;
- unsigned long lpcr;
if (vcpu->kvm->arch.l1_ptcr == 0)
return H_NOT_AVAILABLE;
@@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
vcpu->arch.regs = l2_regs;
- vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
- mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
- LPCR_LPES | LPCR_MER;
- lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
+
+ /* Guest must always run with ME enabled, HV disabled. */
+ vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
+
sanitise_hv_regs(vcpu, &l2_hv);
restore_hv_regs(vcpu, &l2_hv);
@@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
r = RESUME_HOST;
break;
}
- r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
+ r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
} while (is_kvmppc_resume_guest(r));
/* save L2 state for return */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 88da2764c1bb..7af7c70f1468 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
- unsigned long pte_index, unsigned long avpn,
- unsigned long va)
+ unsigned long pte_index, unsigned long avpn)
{
struct kvm *kvm = vcpu->kvm;
__be64 *hpte;