From 297b8603e356ad82c1345cc75fad4d89310a3c34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 11:52:38 +0000 Subject: KVM: arm64: Provide KVM's own save/restore SVE primitives as we are about to change the way KVM deals with SVE, provide KVM with its own save/restore SVE primitives. No functional change intended. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/fpsimdmacros.h | 2 ++ arch/arm64/include/asm/kvm_hyp.h | 2 ++ arch/arm64/kvm/hyp/fpsimd.S | 10 ++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 10 +++++----- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index af43367534c7..e9b72d35b867 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -6,6 +6,8 @@ * Author: Catalin Marinas */ +#include + .macro fpsimd_save state, tmpnr stp q0, q1, [\state, #16 * 0] stp q2, q3, [\state, #16 * 2] diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index c0450828378b..e8b0f7fcd86b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -85,6 +85,8 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +void __sve_save_state(void *sve_pffr, u32 *fpsr); +void __sve_restore_state(void *sve_pffr, u32 *fpsr, unsigned int vqminus1); #ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 01f114aa47b0..95b22e10996c 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -19,3 +19,13 @@ SYM_FUNC_START(__fpsimd_restore_state) fpsimd_restore x0, 1 ret SYM_FUNC_END(__fpsimd_restore_state) + +SYM_FUNC_START(__sve_restore_state) + sve_load 0, x1, x2, 3, x4 + ret +SYM_FUNC_END(__sve_restore_state) + +SYM_FUNC_START(__sve_save_state) + sve_save 0, x1, 2 + ret +SYM_FUNC_END(__sve_save_state) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 54f4860cd87c..807bc4734828 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -256,8 +256,8 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) vcpu->arch.host_fpsimd_state, struct thread_struct, uw.fpsimd_state); - sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); + __sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); } else { __fpsimd_save_state(vcpu->arch.host_fpsimd_state); } @@ -266,9 +266,9 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) } if (sve_guest) { - sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + __sve_restore_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); } else { __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); -- cgit v1.2.3 From 83857371d4cbeff8551fa770e045be9c6b04715c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 13:51:44 +0000 Subject: KVM: arm64: Use {read,write}_sysreg_el1 to access ZCR_EL1 Switch to the unified EL1 accessors for ZCR_EL1, which will make things easier for nVHE support. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 3 ++- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3e081d556e81..b7e36a506d3d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -112,7 +113,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12); + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); } else if (host_has_sve) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 807bc4734828..d762d5bdc2d5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -269,7 +269,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) __sve_restore_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); + write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } else { __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); } -- cgit v1.2.3 From 985d3a1beab543875e0c857ce263cad8233923bb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 19:18:42 +0000 Subject: KVM: arm64: Let vcpu_sve_pffr() handle HYP VAs The vcpu_sve_pffr() returns a pointer, which can be an interesting thing to do on nVHE. Wrap the pointer with kern_hyp_va(), and take this opportunity to remove the unnecessary casts (sve_state being a void *). Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d10e6527f7d..fb1d78299ba0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -372,8 +372,8 @@ struct kvm_vcpu_arch { }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ -#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ - sve_ffr_offset((vcpu)->arch.sve_max_vl))) +#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ + sve_ffr_offset((vcpu)->arch.sve_max_vl)) #define vcpu_sve_state_size(vcpu) ({ \ size_t __size_ret; \ -- cgit v1.2.3 From 468f3477ef8bda1beeb91dd7f423c9bc248ac39d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Mar 2021 14:38:43 +0000 Subject: KVM: arm64: Introduce vcpu_sve_vq() helper The KVM code contains a number of "sve_vq_from_vl(vcpu->arch.sve_max_vl)" instances, and we are about to add more. Introduce vcpu_sve_vq() as a shorthand for this expression. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 4 +++- arch/arm64/kvm/guest.c | 6 +++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fb1d78299ba0..e59b16008868 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -375,6 +375,8 @@ struct kvm_vcpu_arch { #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ sve_ffr_offset((vcpu)->arch.sve_max_vl)) +#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) + #define vcpu_sve_state_size(vcpu) ({ \ size_t __size_ret; \ unsigned int __vcpu_vq; \ @@ -382,7 +384,7 @@ struct kvm_vcpu_arch { if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \ __size_ret = 0; \ } else { \ - __vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \ + __vcpu_vq = vcpu_sve_max_vq(vcpu); \ __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \ } \ \ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 9bbd30e62799..c763808cacdf 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -299,7 +299,7 @@ static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) memset(vqs, 0, sizeof(vqs)); - max_vq = sve_vq_from_vl(vcpu->arch.sve_max_vl); + max_vq = vcpu_sve_max_vq(vcpu); for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) if (sve_vq_available(vq)) vqs[vq_word(vq)] |= vq_mask(vq); @@ -427,7 +427,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region, if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0) return -ENOENT; - vq = sve_vq_from_vl(vcpu->arch.sve_max_vl); + vq = vcpu_sve_max_vq(vcpu); reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) - SVE_SIG_REGS_OFFSET; @@ -437,7 +437,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region, if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0) return -ENOENT; - vq = sve_vq_from_vl(vcpu->arch.sve_max_vl); + vq = vcpu_sve_max_vq(vcpu); reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) - SVE_SIG_REGS_OFFSET; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index d762d5bdc2d5..fb68271c1a0f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -268,7 +268,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) if (sve_guest) { __sve_restore_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + vcpu_sve_vq(vcpu) - 1); write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } else { __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); -- cgit v1.2.3 From 71ce1ae56e4d43a0c568e2d4bfb154cd15306a82 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 18 Mar 2021 09:43:03 +0000 Subject: arm64: sve: Provide a conditional update accessor for ZCR_ELx A common pattern is to conditionally update ZCR_ELx in order to avoid the "self-synchronizing" effect that writing to this register has. Let's provide an accessor that does exactly this. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/fpsimd.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bec5f14b622a..05c9c55768b8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -130,6 +130,15 @@ static inline void sve_user_enable(void) sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN); } +#define sve_cond_update_zcr_vq(val, reg) \ + do { \ + u64 __zcr = read_sysreg_s((reg)); \ + u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \ + __new |= (val) & ZCR_ELx_LEN_MASK; \ + if (__zcr != __new) \ + write_sysreg_s(__new, (reg)); \ + } while (0) + /* * Probing and setup functions. * Calls to these functions must be serialised with one another. -- cgit v1.2.3 From 52029198c1cec1e21513d74f87363a0408f28650 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Mar 2021 08:52:40 +0000 Subject: KVM: arm64: Rework SVE host-save/guest-restore In order to keep the code readable, move the host-save/guest-restore sequences in their own functions, with the following changes: - the hypervisor ZCR is now set from C code - ZCR_EL2 is always used as the EL2 accessor This results in some minor assembler macro rework. No functional change intended. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/fpsimdmacros.h | 8 +++++-- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kvm/hyp/fpsimd.S | 2 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 40 ++++++++++++++++++++------------- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index e9b72d35b867..a2563992d2dc 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -232,8 +232,7 @@ str w\nxtmp, [\xpfpsr, #4] .endm -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 +.macro __sve_load nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 @@ -244,3 +243,8 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm + +.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 + __sve_load \nxbase, \xpfpsr, \nxtmp +.endm diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index e8b0f7fcd86b..a3e89424ae63 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -86,7 +86,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); void __sve_save_state(void *sve_pffr, u32 *fpsr); -void __sve_restore_state(void *sve_pffr, u32 *fpsr, unsigned int vqminus1); +void __sve_restore_state(void *sve_pffr, u32 *fpsr); #ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 95b22e10996c..3c635929771a 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -21,7 +21,7 @@ SYM_FUNC_START(__fpsimd_restore_state) SYM_FUNC_END(__fpsimd_restore_state) SYM_FUNC_START(__sve_restore_state) - sve_load 0, x1, x2, 3, x4 + __sve_load 0, x1, 2 ret SYM_FUNC_END(__sve_restore_state) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index fb68271c1a0f..8071e1cad289 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -196,6 +196,24 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return true; } +static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) +{ + struct thread_struct *thread; + + thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct, + uw.fpsimd_state); + + __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr); +} + +static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) +{ + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); + __sve_restore_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr); + write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); +} + /* Check for an FPSIMD/SVE trap and handle as appropriate */ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { @@ -251,28 +269,18 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) * In the SVE case, VHE is assumed: it is enforced by * Kconfig and kvm_arch_init(). */ - if (sve_host) { - struct thread_struct *thread = container_of( - vcpu->arch.host_fpsimd_state, - struct thread_struct, uw.fpsimd_state); - - __sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); - } else { + if (sve_host) + __hyp_sve_save_host(vcpu); + else __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - } vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; } - if (sve_guest) { - __sve_restore_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.fp_regs.fpsr, - vcpu_sve_vq(vcpu) - 1); - write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); - } else { + if (sve_guest) + __hyp_sve_restore_guest(vcpu); + else __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); - } /* Skip restoring fpexc32 for AArch64 guests */ if (!(read_sysreg(hcr_el2) & HCR_RW)) -- cgit v1.2.3 From 0a9a98fda3a24b0775ace4be096290b221f2f6a5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 15:07:14 +0000 Subject: KVM: arm64: Map SVE context at EL2 when available When running on nVHE, and that the vcpu supports SVE, map the SVE state at EL2 so that KVM can access it. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index b7e36a506d3d..3c37a419fa82 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -43,6 +43,17 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) if (ret) goto error; + if (vcpu->arch.sve_state) { + void *sve_end; + + sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu); + + ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end, + PAGE_HYP); + if (ret) + goto error; + } + vcpu->arch.host_thread_info = kern_hyp_va(ti); vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); error: -- cgit v1.2.3 From b145a8437aab2799969f6ad8e384b557872333c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Mar 2021 14:30:52 +0000 Subject: KVM: arm64: Save guest's ZCR_EL1 before saving the FPSIMD state Make sure the guest's ZCR_EL1 is saved before we save/flush the state. This will be useful in later patches. Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 3c37a419fa82..14ea05c5134a 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -121,10 +121,10 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - fpsimd_save_and_flush_cpu_state(); - if (guest_has_sve) __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + + fpsimd_save_and_flush_cpu_state(); } else if (host_has_sve) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we -- cgit v1.2.3 From beed09067b428a7e84a53b05c1de1f93c8460e91 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Mar 2021 16:50:39 +0000 Subject: KVM: arm64: Trap host SVE accesses when the FPSIMD state is dirty ZCR_EL2 controls the upper bound for ZCR_EL1, and is set to a potentially lower limit when the guest uses SVE. In order to restore the SVE state on the EL1 host, we must first reset ZCR_EL2 to its original value. To make it as lazy as possible on the EL1 host side, set the SVE trapping in place when exiting from the guest. On the first EL1 access to SVE, ZCR_EL2 will be restored to its full glory. Suggested-by: Andrew Scull Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 ++++ arch/arm64/kvm/hyp/nvhe/switch.c | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index f012f8665ecc..8d04d69edd15 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -177,6 +177,10 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; + case ESR_ELx_EC_SVE: + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); + break; default: hyp_panic(); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f3d0e9eca56c..60adc7ff4caa 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -68,7 +68,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) static void __deactivate_traps(struct kvm_vcpu *vcpu) { extern char __kvm_hyp_host_vector[]; - u64 mdcr_el2; + u64 mdcr_el2, cptr; ___deactivate_traps(vcpu); @@ -101,7 +101,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2); else write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); + + cptr = CPTR_EL2_DEFAULT; + if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) + cptr |= CPTR_EL2_TZ; + + write_sysreg(cptr, cptr_el2); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } -- cgit v1.2.3 From 8c8010d69c1322734a272eb95dbbf42b5190e565 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 11 Mar 2021 18:29:55 +0000 Subject: KVM: arm64: Save/restore SVE state for nVHE Implement the SVE save/restore for nVHE, following a similar logic to that of the VHE implementation: - the SVE state is switched on trap from EL1 to EL2 - no further changes to ZCR_EL2 occur as long as the guest isn't preempted or exit to userspace - ZCR_EL2 is reset to its default value on the first SVE access from the host EL1, and ZCR_EL1 restored to the default guest value in vcpu_put() Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 10 +++++++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 37 ++++++++++++--------------------- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 14ea05c5134a..5621020b28de 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -121,11 +121,17 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - if (guest_has_sve) + if (guest_has_sve) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + /* Restore the VL that was saved when bound to the CPU */ + if (!has_vhe()) + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, + SYS_ZCR_EL1); + } + fpsimd_save_and_flush_cpu_state(); - } else if (host_has_sve) { + } else if (has_vhe() && host_has_sve) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 8071e1cad289..8a5c57e93e40 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -217,25 +217,19 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) /* Check for an FPSIMD/SVE trap and handle as appropriate */ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { - bool vhe, sve_guest, sve_host; + bool sve_guest, sve_host; u8 esr_ec; + u64 reg; if (!system_supports_fpsimd()) return false; - /* - * Currently system_supports_sve() currently implies has_vhe(), - * so the check is redundant. However, has_vhe() can be determined - * statically and helps the compiler remove dead code. - */ - if (has_vhe() && system_supports_sve()) { + if (system_supports_sve()) { sve_guest = vcpu_has_sve(vcpu); sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - vhe = true; } else { sve_guest = false; sve_host = false; - vhe = has_vhe(); } esr_ec = kvm_vcpu_trap_get_class(vcpu); @@ -244,31 +238,26 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) return false; /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest) - if (esr_ec != ESR_ELx_EC_FP_ASIMD) - return false; + if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD) + return false; /* Valid trap. Switch the context: */ - - if (vhe) { - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; - + if (has_vhe()) { + reg = CPACR_EL1_FPEN; if (sve_guest) reg |= CPACR_EL1_ZEN; - write_sysreg(reg, cpacr_el1); + sysreg_clear_set(cpacr_el1, 0, reg); } else { - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, - cptr_el2); - } + reg = CPTR_EL2_TFP; + if (sve_guest) + reg |= CPTR_EL2_TZ; + sysreg_clear_set(cptr_el2, reg, 0); + } isb(); if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - /* - * In the SVE case, VHE is assumed: it is enforced by - * Kconfig and kvm_arch_init(). - */ if (sve_host) __hyp_sve_save_host(vcpu); else diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 60adc7ff4caa..b3fc0169268f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -41,9 +41,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_common(vcpu); val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; + val |= CPTR_EL2_TTA | CPTR_EL2_TAM; if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP; + val |= CPTR_EL2_TFP | CPTR_EL2_TZ; __activate_traps_fpsimd32(vcpu); } -- cgit v1.2.3 From 6e94095c5566c946a487fa1f7212b60699fb52c5 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Thu, 11 Mar 2021 19:23:07 +0000 Subject: KVM: arm64: Enable SVE support for nVHE Now that KVM is equipped to deal with SVE on nVHE, remove the code preventing it from being used as well as the bits of documentation that were mentioning the incompatibility. Acked-by: Will Deacon Signed-off-by: Daniel Kiss Signed-off-by: Marc Zyngier --- arch/arm64/Kconfig | 7 ------- arch/arm64/include/asm/kvm_host.h | 13 ------------- arch/arm64/kvm/arm.c | 5 ----- arch/arm64/kvm/reset.c | 4 ---- 4 files changed, 29 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1f212b47a48a..2690543799fb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1686,7 +1686,6 @@ endmenu config ARM64_SVE bool "ARM Scalable Vector Extension support" default y - depends on !KVM || ARM64_VHE help The Scalable Vector Extension (SVE) is an extension to the AArch64 execution state which complements and extends the SIMD functionality @@ -1715,12 +1714,6 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. - CPUs that support SVE are architecturally required to support the - Virtualization Host Extensions (VHE), so the kernel makes no - provision for supporting SVE alongside KVM without VHE enabled. - Thus, you will need to enable CONFIG_ARM64_VHE if you want to support - KVM in the same kernel image. - config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e59b16008868..08f500b2551a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -694,19 +694,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } -static inline bool kvm_arch_requires_vhe(void) -{ - /* - * The Arm architecture specifies that implementation of SVE - * requires VHE also to be implemented. The KVM code for arm64 - * relies on this when SVE is present: - */ - if (system_supports_sve()) - return true; - - return false; -} - void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fc4c95dd2d26..ef92b7d32ebd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1889,11 +1889,6 @@ int kvm_arch_init(void *opaque) in_hyp_mode = is_kernel_in_hyp_mode(); - if (!in_hyp_mode && kvm_arch_requires_vhe()) { - kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); - return -ENODEV; - } - if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3ea..f08b1e7ebf68 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -74,10 +74,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) if (!system_supports_sve()) return -EINVAL; - /* Verify that KVM startup enforced this when SVE was detected: */ - if (WARN_ON(!has_vhe())) - return -EINVAL; - vcpu->arch.sve_max_vl = kvm_sve_max_vl; /* -- cgit v1.2.3 From 5b08709313718e95ba06ef49aa82f964a605bd9c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 18 Mar 2021 18:30:26 +0000 Subject: KVM: arm64: Fix host's ZCR_EL2 restore on nVHE We re-enter the EL1 host with CPTR_EL2.TZ set in order to be able to lazily restore ZCR_EL2 when required. However, the same CPTR_EL2 configuration also leads to trapping when ZCR_EL2 is accessed from EL2. Duh! Clear CPTR_EL2.TZ *before* writing to ZCR_EL2. Fixes: beed09067b42 ("KVM: arm64: Trap host SVE accesses when the FPSIMD state is dirty") Reported-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8d04d69edd15..84a702dc4a92 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -178,8 +178,9 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) handle_host_smc(host_ctxt); break; case ESR_ELx_EC_SVE: - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); + isb(); + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); break; default: hyp_panic(); -- cgit v1.2.3 From 8d9902055c57548bb342dc3ca78caa21e9643024 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Mar 2021 10:01:09 +0000 Subject: arm64: lib: Annotate {clear, copy}_page() as position-independent clear_page() and copy_page() are suitable for use outside of the kernel address space, so annotate them as position-independent code. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-2-qperret@google.com --- arch/arm64/lib/clear_page.S | 4 ++-- arch/arm64/lib/copy_page.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 073acbf02a7c..b84b179edba3 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -SYM_FUNC_START(clear_page) +SYM_FUNC_START_PI(clear_page) mrs x1, dczid_el0 and w1, w1, #0xf mov x2, #4 @@ -25,5 +25,5 @@ SYM_FUNC_START(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 1b ret -SYM_FUNC_END(clear_page) +SYM_FUNC_END_PI(clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index e7a793961408..29144f4cd449 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -SYM_FUNC_START(copy_page) +SYM_FUNC_START_PI(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,5 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112 - 256] ret -SYM_FUNC_END(copy_page) +SYM_FUNC_END_PI(copy_page) EXPORT_SYMBOL(copy_page) -- cgit v1.2.3 From 7b4a7b5e6fefd15f708f959dd43e188444e252ec Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Mar 2021 10:01:10 +0000 Subject: KVM: arm64: Link position-independent string routines into .hyp.text Pull clear_page(), copy_page(), memcpy() and memset() into the nVHE hyp code and ensure that we always execute the '__pi_' entry point on the offchance that it changes in future. [ qperret: Commit title nits and added linker script alias ] Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-3-qperret@google.com --- arch/arm64/include/asm/hyp_image.h | 3 +++ arch/arm64/kernel/image-vars.h | 11 +++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 4 ++++ 3 files changed, 18 insertions(+) diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h index 737ded6b6d0d..78cd77990c9c 100644 --- a/arch/arm64/include/asm/hyp_image.h +++ b/arch/arm64/include/asm/hyp_image.h @@ -56,6 +56,9 @@ */ #define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym; +/* Defines a linker script alias for KVM nVHE hyp symbols */ +#define KVM_NVHE_ALIAS_HYP(first, sec) kvm_nvhe_sym(first) = kvm_nvhe_sym(sec); + #endif /* LINKER_SCRIPT */ #endif /* __ARM64_HYP_IMAGE_H__ */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5aa9ed1e9ec6..4eb7a15c8b60 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -104,6 +104,17 @@ KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); /* PMU available static key */ KVM_NVHE_ALIAS(kvm_arm_pmu_available); +/* Position-independent library routines */ +KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page); +KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page); +KVM_NVHE_ALIAS_HYP(memcpy, __pi_memcpy); +KVM_NVHE_ALIAS_HYP(memset, __pi_memset); + +#ifdef CONFIG_KASAN +KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy); +KVM_NVHE_ALIAS_HYP(__memset, __pi_memset); +#endif + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index a6707df4f6c0..bc98f8e3d1da 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -9,10 +9,14 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS hostprogs := gen-hyprel HOST_EXTRACFLAGS += -I$(objtree)/include +lib-objs := clear_page.o copy_page.o memcpy.o memset.o +lib-objs := $(addprefix ../../../lib/, $(lib-objs)) + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o +obj-y += $(lib-objs) ## ## Build rules for compiling nVHE hyp code -- cgit v1.2.3 From 67c2d326332ee28079348e43cf4f17bbfe63b260 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Mar 2021 10:01:11 +0000 Subject: arm64: kvm: Add standalone ticket spinlock implementation for use at hyp We will soon need to synchronise multiple CPUs in the hyp text at EL2. The qspinlock-based locking used by the host is overkill for this purpose and relies on the kernel's "percpu" implementation for the MCS nodes. Implement a simple ticket locking scheme based heavily on the code removed by commit c11090474d70 ("arm64: locking: Replace ticket lock implementation with qspinlock"). Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-4-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 92 ++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/spinlock.h diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h new file mode 100644 index 000000000000..76b537f8d1c6 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * A stand-alone ticket spinlock implementation for use by the non-VHE + * KVM hypervisor code running at EL2. + * + * Copyright (C) 2020 Google LLC + * Author: Will Deacon + * + * Heavily based on the implementation removed by c11090474d70 which was: + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ARM64_KVM_NVHE_SPINLOCK_H__ +#define __ARM64_KVM_NVHE_SPINLOCK_H__ + +#include +#include + +typedef union hyp_spinlock { + u32 __val; + struct { +#ifdef __AARCH64EB__ + u16 next, owner; +#else + u16 owner, next; +#endif + }; +} hyp_spinlock_t; + +#define hyp_spin_lock_init(l) \ +do { \ + *(l) = (hyp_spinlock_t){ .__val = 0 }; \ +} while (0) + +static inline void hyp_spin_lock(hyp_spinlock_t *lock) +{ + u32 tmp; + hyp_spinlock_t lockval, newval; + + asm volatile( + /* Atomically increment the next ticket. */ + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ +" prfm pstl1strm, %3\n" +"1: ldaxr %w0, %3\n" +" add %w1, %w0, #(1 << 16)\n" +" stxr %w2, %w1, %3\n" +" cbnz %w2, 1b\n", + /* LSE atomics */ +" mov %w2, #(1 << 16)\n" +" ldadda %w2, %w0, %3\n" + __nops(3)) + + /* Did we get the lock? */ +" eor %w1, %w0, %w0, ror #16\n" +" cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ +" sevl\n" +"2: wfe\n" +" ldaxrh %w2, %4\n" +" eor %w1, %w2, %w0, lsr #16\n" +" cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ +"3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->owner) + : "memory"); +} + +static inline void hyp_spin_unlock(hyp_spinlock_t *lock) +{ + u64 tmp; + + asm volatile( + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " ldrh %w1, %0\n" + " add %w1, %w1, #1\n" + " stlrh %w1, %0", + /* LSE atomics */ + " mov %w1, #1\n" + " staddlh %w1, %0\n" + __nops(1)) + : "=Q" (lock->owner), "=&r" (tmp) + : + : "memory"); +} + +#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */ -- cgit v1.2.3 From 9cc7758145fd24b17cff0734b7cfd80de30be052 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:12 +0000 Subject: KVM: arm64: Initialize kvm_nvhe_init_params early Move the initialization of kvm_nvhe_init_params in a dedicated function that is run early, and only once during KVM init, rather than every time the KVM vectors are set and reset. This also opens the opportunity for the hypervisor to change the init structs during boot, hence simplifying the replacement of host-provided page-table by the one the hypervisor will create for itself. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-5-qperret@google.com --- arch/arm64/kvm/arm.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c2df58be5b0c..2adb8d878bb9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1388,22 +1388,18 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_init_hyp_mode(void) +static void cpu_prepare_hyp_mode(int cpu) { - struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); - struct arm_smccc_res res; + struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; - /* Switch from the HYP stub to our own HYP init vector */ - __hyp_set_vectors(kvm_get_idmap_vector()); - /* * Calculate the raw per-cpu offset without a translation from the * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. * Also drop the KASAN tag which gets in the way... */ - params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) - + params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); params->mair_el2 = read_sysreg(mair_el1); @@ -1427,7 +1423,7 @@ static void cpu_init_hyp_mode(void) tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; params->tcr_el2 = tcr; - params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); + params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); /* @@ -1435,6 +1431,15 @@ static void cpu_init_hyp_mode(void) * be read while the MMU is off. */ kvm_flush_dcache_to_poc(params, sizeof(*params)); +} + +static void cpu_init_hyp_mode(void) +{ + struct kvm_nvhe_init_params *params; + struct arm_smccc_res res; + + /* Switch from the HYP stub to our own HYP init vector */ + __hyp_set_vectors(kvm_get_idmap_vector()); /* * Call initialization code, and switch to the full blown HYP code. @@ -1443,6 +1448,7 @@ static void cpu_init_hyp_mode(void) * cpus_have_const_cap() wrapper. */ BUG_ON(!system_capabilities_finalized()); + params = this_cpu_ptr_nvhe_sym(kvm_init_params); arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); @@ -1790,19 +1796,19 @@ static int init_hyp_mode(void) } } - /* - * Map Hyp percpu pages - */ for_each_possible_cpu(cpu) { char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu]; char *percpu_end = percpu_begin + nvhe_percpu_size(); + /* Map Hyp percpu pages */ err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP); - if (err) { kvm_err("Cannot map hyp percpu region\n"); goto out_err; } + + /* Prepare the CPU initialization parameters */ + cpu_prepare_hyp_mode(cpu); } if (is_protected_kvm_enabled()) { -- cgit v1.2.3 From cc706a63894fdcc25d226378898921e1ab7dd64e Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:13 +0000 Subject: KVM: arm64: Avoid free_page() in page-table allocator Currently, the KVM page-table allocator uses a mix of put_page() and free_page() calls depending on the context even though page-allocation is always achieved using variants of __get_free_page(). Make the code consistent by using put_page() throughout, and reduce the memory management API surface used by the page-table code. This will ease factoring out page-allocation from pgtable.c, which is a pre-requisite to creating page-tables at EL2. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-6-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 926fc07074f5..0990fda19198 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -414,7 +414,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { - free_page((unsigned long)kvm_pte_follow(*ptep)); + put_page(virt_to_page(kvm_pte_follow(*ptep))); return 0; } @@ -426,7 +426,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - free_page((unsigned long)pgt->pgd); + put_page(virt_to_page(pgt->pgd)); pgt->pgd = NULL; } @@ -578,7 +578,7 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, if (!data->anchor) return 0; - free_page((unsigned long)kvm_pte_follow(*ptep)); + put_page(virt_to_page(kvm_pte_follow(*ptep))); put_page(virt_to_page(ptep)); if (data->anchor == ptep) { @@ -701,7 +701,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, } if (childp) - free_page((unsigned long)childp); + put_page(virt_to_page(childp)); return 0; } @@ -898,7 +898,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, put_page(virt_to_page(ptep)); if (kvm_pte_table(pte, level)) - free_page((unsigned long)kvm_pte_follow(pte)); + put_page(virt_to_page(kvm_pte_follow(pte))); return 0; } -- cgit v1.2.3 From 7aef0cbcdcd0995efde9957b3eda9f31a219613d Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:14 +0000 Subject: KVM: arm64: Factor memory allocation out of pgtable.c In preparation for enabling the creation of page-tables at EL2, factor all memory allocation out of the page-table code, hence making it re-usable with any compatible memory allocator. No functional changes intended. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-7-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 41 ++++++++++++++- arch/arm64/kvm/hyp/pgtable.c | 98 ++++++++++++++++++++++-------------- arch/arm64/kvm/mmu.c | 66 +++++++++++++++++++++++- 3 files changed, 163 insertions(+), 42 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 8886d43cfb11..bbe840e430cb 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -13,17 +13,50 @@ typedef u64 kvm_pte_t; +/** + * struct kvm_pgtable_mm_ops - Memory management callbacks. + * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter + * can be used by the walker to pass a memcache. The + * initial refcount of the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The + * @size parameter is in bytes, and is rounded-up to the + * next page boundary. The resulting allocation is + * physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the refcount + * reaches 0 the page is automatically freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual address mapped + * in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current context + * into a physical address. + */ +struct kvm_pgtable_mm_ops { + void* (*zalloc_page)(void *arg); + void* (*zalloc_pages_exact)(size_t size); + void (*free_pages_exact)(void *addr, size_t size); + void (*get_page)(void *addr); + void (*put_page)(void *addr); + int (*page_count)(void *addr); + void* (*phys_to_virt)(phys_addr_t phys); + phys_addr_t (*virt_to_phys)(void *addr); +}; + /** * struct kvm_pgtable - KVM page-table. * @ia_bits: Maximum input address size, in bits. * @start_level: Level at which the page-table walk starts. * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. */ struct kvm_pgtable { u32 ia_bits; u32 start_level; kvm_pte_t *pgd; + struct kvm_pgtable_mm_ops *mm_ops; /* Stage-2 only */ struct kvm_s2_mmu *mmu; @@ -86,10 +119,12 @@ struct kvm_pgtable_walker { * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. * @va_bits: Maximum virtual address bits. + * @mm_ops: Memory management callbacks. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, + struct kvm_pgtable_mm_ops *mm_ops); /** * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. @@ -126,10 +161,12 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. * @kvm: KVM structure representing the guest virtual machine. + * @mm_ops: Memory management callbacks. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm); +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm, + struct kvm_pgtable_mm_ops *mm_ops); /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0990fda19198..ff478a576f4d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -152,9 +152,9 @@ static kvm_pte_t kvm_phys_to_pte(u64 pa) return pte; } -static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte) +static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) { - return __va(kvm_pte_to_phys(pte)); + return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); } static void kvm_set_invalid_pte(kvm_pte_t *ptep) @@ -163,9 +163,10 @@ static void kvm_set_invalid_pte(kvm_pte_t *ptep) WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); } -static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) +static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp, + struct kvm_pgtable_mm_ops *mm_ops) { - kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp)); + kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); pte |= KVM_PTE_VALID; @@ -228,7 +229,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(pte); + childp = kvm_pte_follow(pte, data->pgt->mm_ops); ret = __kvm_pgtable_walk(data, childp, level + 1); if (ret) goto out; @@ -303,8 +304,9 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, } struct hyp_map_data { - u64 phys; - kvm_pte_t attr; + u64 phys; + kvm_pte_t attr; + struct kvm_pgtable_mm_ops *mm_ops; }; static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, @@ -359,6 +361,8 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { kvm_pte_t *childp; + struct hyp_map_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) return 0; @@ -366,11 +370,11 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; - childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); + childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); if (!childp) return -ENOMEM; - kvm_set_table_pte(ptep, childp); + kvm_set_table_pte(ptep, childp, mm_ops); return 0; } @@ -380,6 +384,7 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, int ret; struct hyp_map_data map_data = { .phys = ALIGN_DOWN(phys, PAGE_SIZE), + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = hyp_map_walker, @@ -397,16 +402,18 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } -int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, + struct kvm_pgtable_mm_ops *mm_ops) { u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); - pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); + pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL); if (!pgt->pgd) return -ENOMEM; pgt->ia_bits = va_bits; pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; + pgt->mm_ops = mm_ops; pgt->mmu = NULL; return 0; } @@ -414,7 +421,9 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { - put_page(virt_to_page(kvm_pte_follow(*ptep))); + struct kvm_pgtable_mm_ops *mm_ops = arg; + + mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops)); return 0; } @@ -423,10 +432,11 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, .flags = KVM_PGTABLE_WALK_TABLE_POST, + .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - put_page(virt_to_page(pgt->pgd)); + pgt->mm_ops->put_page(pgt->pgd); pgt->pgd = NULL; } @@ -438,6 +448,8 @@ struct stage2_map_data { struct kvm_s2_mmu *mmu; struct kvm_mmu_memory_cache *memcache; + + struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, @@ -471,7 +483,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, { kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; - struct page *page = virt_to_page(ptep); + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_block_mapping_supported(addr, end, phys, level)) return -E2BIG; @@ -493,11 +505,11 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, */ kvm_set_invalid_pte(ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - put_page(page); + mm_ops->put_page(ptep); } smp_store_release(ptep, new); - get_page(page); + mm_ops->get_page(ptep); data->phys += granule; return 0; } @@ -527,13 +539,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { - int ret; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; kvm_pte_t *childp, pte = *ptep; - struct page *page = virt_to_page(ptep); + int ret; if (data->anchor) { if (kvm_pte_valid(pte)) - put_page(page); + mm_ops->put_page(ptep); return 0; } @@ -548,7 +560,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (!data->memcache) return -ENOMEM; - childp = kvm_mmu_memory_cache_alloc(data->memcache); + childp = mm_ops->zalloc_page(data->memcache); if (!childp) return -ENOMEM; @@ -560,11 +572,11 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (kvm_pte_valid(pte)) { kvm_set_invalid_pte(ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - put_page(page); + mm_ops->put_page(ptep); } - kvm_set_table_pte(ptep, childp); - get_page(page); + kvm_set_table_pte(ptep, childp, mm_ops); + mm_ops->get_page(ptep); return 0; } @@ -573,13 +585,14 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; int ret = 0; if (!data->anchor) return 0; - put_page(virt_to_page(kvm_pte_follow(*ptep))); - put_page(virt_to_page(ptep)); + mm_ops->put_page(kvm_pte_follow(*ptep, mm_ops)); + mm_ops->put_page(ptep); if (data->anchor == ptep) { data->anchor = NULL; @@ -634,6 +647,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .phys = ALIGN_DOWN(phys, PAGE_SIZE), .mmu = pgt->mmu, .memcache = mc, + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -670,7 +684,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { - struct kvm_s2_mmu *mmu = arg; + struct kvm_pgtable *pgt = arg; + struct kvm_s2_mmu *mmu = pgt->mmu; + struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep, *childp = NULL; bool need_flush = false; @@ -678,9 +694,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return 0; if (kvm_pte_table(pte, level)) { - childp = kvm_pte_follow(pte); + childp = kvm_pte_follow(pte, mm_ops); - if (page_count(virt_to_page(childp)) != 1) + if (mm_ops->page_count(childp) != 1) return 0; } else if (stage2_pte_cacheable(pte)) { need_flush = true; @@ -693,15 +709,15 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ kvm_set_invalid_pte(ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); - put_page(virt_to_page(ptep)); + mm_ops->put_page(ptep); if (need_flush) { - stage2_flush_dcache(kvm_pte_follow(pte), + stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); } if (childp) - put_page(virt_to_page(childp)); + mm_ops->put_page(childp); return 0; } @@ -710,7 +726,7 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { struct kvm_pgtable_walker walker = { .cb = stage2_unmap_walker, - .arg = pgt->mmu, + .arg = pgt, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; @@ -842,12 +858,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { + struct kvm_pgtable_mm_ops *mm_ops = arg; kvm_pte_t pte = *ptep; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) return 0; - stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); + stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); return 0; } @@ -856,6 +873,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) struct kvm_pgtable_walker walker = { .cb = stage2_flush_walker, .flags = KVM_PGTABLE_WALK_LEAF, + .arg = pgt->mm_ops, }; if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) @@ -864,7 +882,8 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) return kvm_pgtable_walk(pgt, addr, size, &walker); } -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm, + struct kvm_pgtable_mm_ops *mm_ops) { size_t pgd_sz; u64 vtcr = kvm->arch.vtcr; @@ -873,12 +892,13 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz); if (!pgt->pgd) return -ENOMEM; pgt->ia_bits = ia_bits; pgt->start_level = start_level; + pgt->mm_ops = mm_ops; pgt->mmu = &kvm->arch.mmu; /* Ensure zeroed PGD pages are visible to the hardware walker */ @@ -890,15 +910,16 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { + struct kvm_pgtable_mm_ops *mm_ops = arg; kvm_pte_t pte = *ptep; if (!kvm_pte_valid(pte)) return 0; - put_page(virt_to_page(ptep)); + mm_ops->put_page(ptep); if (kvm_pte_table(pte, level)) - put_page(virt_to_page(kvm_pte_follow(pte))); + mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); return 0; } @@ -910,10 +931,11 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - free_pages_exact(pgt->pgd, pgd_sz); + pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); pgt->pgd = NULL; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8711894db8c2..e583f7fb3620 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -88,6 +88,44 @@ static bool kvm_is_device_pfn(unsigned long pfn) return !pfn_valid(pfn); } +static void *stage2_memcache_zalloc_page(void *arg) +{ + struct kvm_mmu_memory_cache *mc = arg; + + /* Allocated with __GFP_ZERO, so no need to zero */ + return kvm_mmu_memory_cache_alloc(mc); +} + +static void *kvm_host_zalloc_pages_exact(size_t size) +{ + return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); +} + +static void kvm_host_get_page(void *addr) +{ + get_page(virt_to_page(addr)); +} + +static void kvm_host_put_page(void *addr) +{ + put_page(virt_to_page(addr)); +} + +static int kvm_host_page_count(void *addr) +{ + return page_count(virt_to_page(addr)); +} + +static phys_addr_t kvm_host_pa(void *addr) +{ + return __pa(addr); +} + +static void *kvm_host_va(phys_addr_t phys) +{ + return __va(phys); +} + /* * Unmapping vs dcache management: * @@ -351,6 +389,17 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, return 0; } +static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { + .zalloc_page = stage2_memcache_zalloc_page, + .zalloc_pages_exact = kvm_host_zalloc_pages_exact, + .free_pages_exact = free_pages_exact, + .get_page = kvm_host_get_page, + .put_page = kvm_host_put_page, + .page_count = kvm_host_page_count, + .phys_to_virt = kvm_host_va, + .virt_to_phys = kvm_host_pa, +}; + /** * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure * @kvm: The pointer to the KVM structure @@ -374,7 +423,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) if (!pgt) return -ENOMEM; - err = kvm_pgtable_stage2_init(pgt, kvm); + err = kvm_pgtable_stage2_init(pgt, kvm, &kvm_s2_mm_ops); if (err) goto out_free_pgtable; @@ -1208,6 +1257,19 @@ static int kvm_map_idmap_text(void) return err; } +static void *kvm_hyp_zalloc_page(void *arg) +{ + return (void *)get_zeroed_page(GFP_KERNEL); +} + +static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { + .zalloc_page = kvm_hyp_zalloc_page, + .get_page = kvm_host_get_page, + .put_page = kvm_host_put_page, + .phys_to_virt = kvm_host_va, + .virt_to_phys = kvm_host_pa, +}; + int kvm_mmu_init(void) { int err; @@ -1251,7 +1313,7 @@ int kvm_mmu_init(void) goto out; } - err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits); + err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits, &kvm_hyp_mm_ops); if (err) goto out_free_pgtable; -- cgit v1.2.3 From 380e18ade4a51334e8806160e6f0fdfaca0b4428 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:15 +0000 Subject: KVM: arm64: Introduce a BSS section for use at Hyp Currently, the hyp code cannot make full use of a bss, as the kernel section is mapped read-only. While this mapping could simply be changed to read-write, it would intermingle even more the hyp and kernel state than they currently are. Instead, introduce a __hyp_bss section, that uses reserved pages, and create the appropriate RW hyp mappings during KVM init. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-8-qperret@google.com --- arch/arm64/include/asm/sections.h | 1 + arch/arm64/kernel/vmlinux.lds.S | 52 +++++++++++++++++++++++++-------------- arch/arm64/kvm/arm.c | 14 ++++++++++- arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 1 + 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 2f36b16a5b5d..e4ad9db53af1 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -13,6 +13,7 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __hyp_rodata_start[], __hyp_rodata_end[]; extern char __hyp_reloc_begin[], __hyp_reloc_end[]; +extern char __hyp_bss_start[], __hyp_bss_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7eea7888bb02..e96173ce211b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -5,24 +5,7 @@ * Written by Martin Mares */ -#define RO_EXCEPTION_TABLE_ALIGN 8 -#define RUNTIME_DISCARD_EXIT - -#include -#include #include -#include -#include -#include - -#include "image.h" - -OUTPUT_ARCH(aarch64) -ENTRY(_text) - -jiffies = jiffies_64; - - #ifdef CONFIG_KVM #define HYPERVISOR_EXTABLE \ . = ALIGN(SZ_8); \ @@ -51,13 +34,43 @@ jiffies = jiffies_64; __hyp_reloc_end = .; \ } +#define BSS_FIRST_SECTIONS \ + __hyp_bss_start = .; \ + *(HYP_SECTION_NAME(.bss)) \ + . = ALIGN(PAGE_SIZE); \ + __hyp_bss_end = .; + +/* + * We require that __hyp_bss_start and __bss_start are aligned, and enforce it + * with an assertion. But the BSS_SECTION macro places an empty .sbss section + * between them, which can in some cases cause the linker to misalign them. To + * work around the issue, force a page alignment for __bss_start. + */ +#define SBSS_ALIGN PAGE_SIZE #else /* CONFIG_KVM */ #define HYPERVISOR_EXTABLE #define HYPERVISOR_DATA_SECTIONS #define HYPERVISOR_PERCPU_SECTION #define HYPERVISOR_RELOC_SECTION +#define SBSS_ALIGN 0 #endif +#define RO_EXCEPTION_TABLE_ALIGN 8 +#define RUNTIME_DISCARD_EXIT + +#include +#include +#include +#include +#include + +#include "image.h" + +OUTPUT_ARCH(aarch64) +ENTRY(_text) + +jiffies = jiffies_64; + #define HYPERVISOR_TEXT \ /* \ * Align to 4 KB so that \ @@ -276,7 +289,7 @@ SECTIONS __pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin); _edata = .; - BSS_SECTION(0, 0, 0) + BSS_SECTION(SBSS_ALIGN, 0, 0) . = ALIGN(PAGE_SIZE); init_pg_dir = .; @@ -324,6 +337,9 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, "Entry trampoline text too big") #endif +#ifdef CONFIG_KVM +ASSERT(__hyp_bss_start == __bss_start, "HYP and Host BSS are misaligned") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 2adb8d878bb9..22d6df525254 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1775,7 +1775,19 @@ static int init_hyp_mode(void) goto out_err; } - err = create_hyp_mappings(kvm_ksym_ref(__bss_start), + /* + * .hyp.bss is guaranteed to be placed at the beginning of the .bss + * section thanks to an assertion in the linker script. Map it RW and + * the rest of .bss RO. + */ + err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start), + kvm_ksym_ref(__hyp_bss_end), PAGE_HYP); + if (err) { + kvm_err("Cannot map hyp bss section: %d\n", err); + goto out_err; + } + + err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end), kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); if (err) { kvm_err("Cannot map bss section\n"); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index cd119d82d8e3..f4562f417d3f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -25,4 +25,5 @@ SECTIONS { BEGIN_HYP_SECTION(.data..percpu) PERCPU_INPUT(L1_CACHE_BYTES) END_HYP_SECTION + HYP_SECTION(.bss) } -- cgit v1.2.3 From 40a50853d37af3fd2e98b769e1a79839ad16b107 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:16 +0000 Subject: KVM: arm64: Make kvm_call_hyp() a function call at Hyp kvm_call_hyp() has some logic to issue a function call or a hypercall depending on the EL at which the kernel is running. However, all the code compiled under __KVM_NVHE_HYPERVISOR__ is guaranteed to only run at EL2 which allows us to simplify. Add ifdefery to kvm_host.h to simplify kvm_call_hyp() in .hyp.text. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-9-qperret@google.com --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 08f500b2551a..6a2031af9562 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -593,6 +593,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); +#ifndef __KVM_NVHE_HYPERVISOR__ #define kvm_call_hyp_nvhe(f, ...) \ ({ \ struct arm_smccc_res res; \ @@ -632,6 +633,11 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ ret; \ }) +#else /* __KVM_NVHE_HYPERVISOR__ */ +#define kvm_call_hyp(f, ...) f(__VA_ARGS__) +#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__) +#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__) +#endif /* __KVM_NVHE_HYPERVISOR__ */ void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); -- cgit v1.2.3 From fa21472a316af8ad7af3114049db89678444c7ed Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:17 +0000 Subject: KVM: arm64: Allow using kvm_nvhe_sym() in hyp code In order to allow the usage of code shared by the host and the hyp in static inline library functions, allow the usage of kvm_nvhe_sym() at EL2 by defaulting to the raw symbol name. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-10-qperret@google.com --- arch/arm64/include/asm/hyp_image.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h index 78cd77990c9c..b4b3076a76fb 100644 --- a/arch/arm64/include/asm/hyp_image.h +++ b/arch/arm64/include/asm/hyp_image.h @@ -10,11 +10,15 @@ #define __HYP_CONCAT(a, b) a ## b #define HYP_CONCAT(a, b) __HYP_CONCAT(a, b) +#ifndef __KVM_NVHE_HYPERVISOR__ /* * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, * to separate it from the kernel proper. */ #define kvm_nvhe_sym(sym) __kvm_nvhe_##sym +#else +#define kvm_nvhe_sym(sym) sym +#endif #ifdef LINKER_SCRIPT -- cgit v1.2.3 From e759604087231c672f91564cc805336e70d333a0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:18 +0000 Subject: KVM: arm64: Introduce an early Hyp page allocator With nVHE, the host currently creates all stage 1 hypervisor mappings at EL1 during boot, installs them at EL2, and extends them as required (e.g. when creating a new VM). But in a world where the host is no longer trusted, it cannot have full control over the code mapped in the hypervisor. In preparation for enabling the hypervisor to create its own stage 1 mappings during boot, introduce an early page allocator, with minimal functionality. This allocator is designed to be used only during early bootstrap of the hyp code when memory protection is enabled, which will then switch to using a full-fledged page allocator after init. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-11-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/early_alloc.h | 14 +++++++ arch/arm64/kvm/hyp/include/nvhe/memory.h | 24 ++++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/early_alloc.c | 54 +++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/psci-relay.c | 4 +- 5 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/early_alloc.h create mode 100644 arch/arm64/kvm/hyp/include/nvhe/memory.h create mode 100644 arch/arm64/kvm/hyp/nvhe/early_alloc.c diff --git a/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h new file mode 100644 index 000000000000..dc61aaa56f31 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_EARLY_ALLOC_H +#define __KVM_HYP_EARLY_ALLOC_H + +#include + +void hyp_early_alloc_init(void *virt, unsigned long size); +unsigned long hyp_early_alloc_nr_used_pages(void); +void *hyp_early_alloc_page(void *arg); +void *hyp_early_alloc_contig(unsigned int nr_pages); + +extern struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops; + +#endif /* __KVM_HYP_EARLY_ALLOC_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h new file mode 100644 index 000000000000..3e49eaa7e682 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_MEMORY_H +#define __KVM_HYP_MEMORY_H + +#include + +#include + +extern s64 hyp_physvirt_offset; + +#define __hyp_pa(virt) ((phys_addr_t)(virt) + hyp_physvirt_offset) +#define __hyp_va(phys) ((void *)((phys_addr_t)(phys) - hyp_physvirt_offset)) + +static inline void *hyp_phys_to_virt(phys_addr_t phys) +{ + return __hyp_va(phys); +} + +static inline phys_addr_t hyp_virt_to_phys(void *addr) +{ + return __hyp_pa(addr); +} + +#endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index bc98f8e3d1da..24ff99e2eac5 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o psci-relay.o + hyp-main.o hyp-smp.o psci-relay.o early_alloc.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c new file mode 100644 index 000000000000..1306c430ab87 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret + */ + +#include + +#include +#include + +struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops; +s64 __ro_after_init hyp_physvirt_offset; + +static unsigned long base; +static unsigned long end; +static unsigned long cur; + +unsigned long hyp_early_alloc_nr_used_pages(void) +{ + return (cur - base) >> PAGE_SHIFT; +} + +void *hyp_early_alloc_contig(unsigned int nr_pages) +{ + unsigned long size = (nr_pages << PAGE_SHIFT); + void *ret = (void *)cur; + + if (!nr_pages) + return NULL; + + if (end - cur < size) + return NULL; + + cur += size; + memset(ret, 0, size); + + return ret; +} + +void *hyp_early_alloc_page(void *arg) +{ + return hyp_early_alloc_contig(1); +} + +void hyp_early_alloc_init(void *virt, unsigned long size) +{ + base = cur = (unsigned long)virt; + end = base + size; + + hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page; + hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt; + hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys; +} diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 63de71c0481e..08508783ec3d 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -11,6 +11,7 @@ #include #include +#include #include void kvm_hyp_cpu_entry(unsigned long r0); @@ -20,9 +21,6 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); /* Config options set by the host. */ struct kvm_host_psci_config __ro_after_init kvm_host_psci_config; -s64 __ro_after_init hyp_physvirt_offset; - -#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) #define INVALID_CPU_ID UINT_MAX -- cgit v1.2.3 From 40d9e41e525c13d07bc72d49968926f4502e5b33 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:19 +0000 Subject: KVM: arm64: Stub CONFIG_DEBUG_LIST at Hyp In order to use the kernel list library at EL2, introduce stubs for the CONFIG_DEBUG_LIST out-of-lines calls. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-12-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/stub.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/stub.c diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 24ff99e2eac5..144da72ad510 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o psci-relay.o early_alloc.o + hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c new file mode 100644 index 000000000000..c0aa6bbfd79d --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/stub.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Stubs for out-of-line function calls caused by re-using kernel + * infrastructure at EL2. + * + * Copyright (C) 2020 - Google LLC + */ + +#include + +#ifdef CONFIG_DEBUG_LIST +bool __list_add_valid(struct list_head *new, struct list_head *prev, + struct list_head *next) +{ + return true; +} + +bool __list_del_entry_valid(struct list_head *entry) +{ + return true; +} +#endif -- cgit v1.2.3 From 8e17c66249e9ea08b44879c7af0315e70a83316c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:20 +0000 Subject: KVM: arm64: Introduce a Hyp buddy page allocator When memory protection is enabled, the hyp code will require a basic form of memory management in order to allocate and free memory pages at EL2. This is needed for various use-cases, including the creation of hyp mappings or the allocation of stage 2 page tables. To address these use-case, introduce a simple memory allocator in the hyp code. The allocator is designed as a conventional 'buddy allocator', working with a page granularity. It allows to allocate and free physically contiguous pages from memory 'pools', with a guaranteed order alignment in the PA space. Each page in a memory pool is associated with a struct hyp_page which holds the page's metadata, including its refcount, as well as its current order, hence mimicking the kernel's buddy system in the GFP infrastructure. The hyp_page metadata are made accessible through a hyp_vmemmap, following the concept of SPARSE_VMEMMAP in the kernel. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-13-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 68 +++++++++++ arch/arm64/kvm/hyp/include/nvhe/memory.h | 28 +++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 195 +++++++++++++++++++++++++++++++ 4 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/gfp.h create mode 100644 arch/arm64/kvm/hyp/nvhe/page_alloc.c diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h new file mode 100644 index 000000000000..55b3f0ce5bc8 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_GFP_H +#define __KVM_HYP_GFP_H + +#include + +#include +#include + +#define HYP_NO_ORDER UINT_MAX + +struct hyp_pool { + /* + * Spinlock protecting concurrent changes to the memory pool as well as + * the struct hyp_page of the pool's pages until we have a proper atomic + * API at EL2. + */ + hyp_spinlock_t lock; + struct list_head free_area[MAX_ORDER]; + phys_addr_t range_start; + phys_addr_t range_end; + unsigned int max_order; +}; + +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + struct hyp_pool *pool = hyp_page_to_pool(p); + + hyp_spin_lock(&pool->lock); + p->refcount++; + hyp_spin_unlock(&pool->lock); +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + struct hyp_pool *pool = hyp_page_to_pool(p); + int ret; + + hyp_spin_lock(&pool->lock); + p->refcount--; + ret = (p->refcount == 0); + hyp_spin_unlock(&pool->lock); + + return ret; +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + struct hyp_pool *pool = hyp_page_to_pool(p); + + hyp_spin_lock(&pool->lock); + if (p->refcount) { + hyp_spin_unlock(&pool->lock); + hyp_panic(); + } + p->refcount = 1; + hyp_spin_unlock(&pool->lock); +} + +/* Allocation */ +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); +void hyp_get_page(void *addr); +void hyp_put_page(void *addr); + +/* Used pages cannot be freed */ +int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, + unsigned int reserved_pages); +#endif /* __KVM_HYP_GFP_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 3e49eaa7e682..d2fb307c5952 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -6,7 +6,17 @@ #include +struct hyp_pool; +struct hyp_page { + unsigned int refcount; + unsigned int order; + struct hyp_pool *pool; + struct list_head node; +}; + extern s64 hyp_physvirt_offset; +extern u64 __hyp_vmemmap; +#define hyp_vmemmap ((struct hyp_page *)__hyp_vmemmap) #define __hyp_pa(virt) ((phys_addr_t)(virt) + hyp_physvirt_offset) #define __hyp_va(phys) ((void *)((phys_addr_t)(phys) - hyp_physvirt_offset)) @@ -21,4 +31,22 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr) return __hyp_pa(addr); } +#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) +#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT)) +#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)]) +#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt)) +#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt)) + +#define hyp_page_to_pfn(page) ((struct hyp_page *)(page) - hyp_vmemmap) +#define hyp_page_to_phys(page) hyp_pfn_to_phys((hyp_page_to_pfn(page))) +#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) +#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) + +static inline int hyp_page_count(void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + return p->refcount; +} + #endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 144da72ad510..6894a917f290 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,7 +13,7 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o + hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c new file mode 100644 index 000000000000..237e03bf0cb1 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret + */ + +#include +#include + +u64 __hyp_vmemmap; + +/* + * Index the hyp_vmemmap to find a potential buddy page, but make no assumption + * about its current state. + * + * Example buddy-tree for a 4-pages physically contiguous pool: + * + * o : Page 3 + * / + * o-o : Page 2 + * / + * / o : Page 1 + * / / + * o---o-o : Page 0 + * Order 2 1 0 + * + * Example of requests on this pool: + * __find_buddy_nocheck(pool, page 0, order 0) => page 1 + * __find_buddy_nocheck(pool, page 0, order 1) => page 2 + * __find_buddy_nocheck(pool, page 1, order 0) => page 0 + * __find_buddy_nocheck(pool, page 2, order 0) => page 3 + */ +static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, + struct hyp_page *p, + unsigned int order) +{ + phys_addr_t addr = hyp_page_to_phys(p); + + addr ^= (PAGE_SIZE << order); + + /* + * Don't return a page outside the pool range -- it belongs to + * something else and may not be mapped in hyp_vmemmap. + */ + if (addr < pool->range_start || addr >= pool->range_end) + return NULL; + + return hyp_phys_to_page(addr); +} + +/* Find a buddy page currently available for allocation */ +static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, + struct hyp_page *p, + unsigned int order) +{ + struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); + + if (!buddy || buddy->order != order || list_empty(&buddy->node)) + return NULL; + + return buddy; + +} + +static void __hyp_attach_page(struct hyp_pool *pool, + struct hyp_page *p) +{ + unsigned int order = p->order; + struct hyp_page *buddy; + + memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); + + /* + * Only the first struct hyp_page of a high-order page (otherwise known + * as the 'head') should have p->order set. The non-head pages should + * have p->order = HYP_NO_ORDER. Here @p may no longer be the head + * after coallescing, so make sure to mark it HYP_NO_ORDER proactively. + */ + p->order = HYP_NO_ORDER; + for (; (order + 1) < pool->max_order; order++) { + buddy = __find_buddy_avail(pool, p, order); + if (!buddy) + break; + + /* Take the buddy out of its list, and coallesce with @p */ + list_del_init(&buddy->node); + buddy->order = HYP_NO_ORDER; + p = min(p, buddy); + } + + /* Mark the new head, and insert it */ + p->order = order; + list_add_tail(&p->node, &pool->free_area[order]); +} + +static void hyp_attach_page(struct hyp_page *p) +{ + struct hyp_pool *pool = hyp_page_to_pool(p); + + hyp_spin_lock(&pool->lock); + __hyp_attach_page(pool, p); + hyp_spin_unlock(&pool->lock); +} + +static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, + struct hyp_page *p, + unsigned int order) +{ + struct hyp_page *buddy; + + list_del_init(&p->node); + while (p->order > order) { + /* + * The buddy of order n - 1 currently has HYP_NO_ORDER as it + * is covered by a higher-level page (whose head is @p). Use + * __find_buddy_nocheck() to find it and inject it in the + * free_list[n - 1], effectively splitting @p in half. + */ + p->order--; + buddy = __find_buddy_nocheck(pool, p, p->order); + buddy->order = p->order; + list_add_tail(&buddy->node, &pool->free_area[buddy->order]); + } + + return p; +} + +void hyp_put_page(void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + if (hyp_page_ref_dec_and_test(p)) + hyp_attach_page(p); +} + +void hyp_get_page(void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + hyp_page_ref_inc(p); +} + +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) +{ + unsigned int i = order; + struct hyp_page *p; + + hyp_spin_lock(&pool->lock); + + /* Look for a high-enough-order page */ + while (i < pool->max_order && list_empty(&pool->free_area[i])) + i++; + if (i >= pool->max_order) { + hyp_spin_unlock(&pool->lock); + return NULL; + } + + /* Extract it from the tree at the right order */ + p = list_first_entry(&pool->free_area[i], struct hyp_page, node); + p = __hyp_extract_page(pool, p, order); + + hyp_spin_unlock(&pool->lock); + hyp_set_page_refcounted(p); + + return hyp_page_to_virt(p); +} + +int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, + unsigned int reserved_pages) +{ + phys_addr_t phys = hyp_pfn_to_phys(pfn); + struct hyp_page *p; + int i; + + hyp_spin_lock_init(&pool->lock); + pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); + for (i = 0; i < pool->max_order; i++) + INIT_LIST_HEAD(&pool->free_area[i]); + pool->range_start = phys; + pool->range_end = phys + (nr_pages << PAGE_SHIFT); + + /* Init the vmemmap portion */ + p = hyp_phys_to_page(phys); + memset(p, 0, sizeof(*p) * nr_pages); + for (i = 0; i < nr_pages; i++) { + p[i].pool = pool; + INIT_LIST_HEAD(&p[i].node); + } + + /* Attach the unused pages to the buddy tree */ + for (i = reserved_pages; i < nr_pages; i++) + __hyp_attach_page(pool, &p[i]); + + return 0; +} -- cgit v1.2.3 From 7a440cc78392c3caf805ef0afc7ead031e4d0830 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:21 +0000 Subject: KVM: arm64: Enable access to sanitized CPU features at EL2 Introduce the infrastructure in KVM enabling to copy CPU feature registers into EL2-owned data-structures, to allow reading sanitised values directly at EL2 in nVHE. Given that only a subset of these features are being read by the hypervisor, the ones that need to be copied are to be listed under together with the name of the nVHE variable that will hold the copy. This introduces only the infrastructure enabling this copy. The first users will follow shortly. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-14-qperret@google.com --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/include/asm/kvm_cpufeature.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 13 +++++++++++++ arch/arm64/kvm/sys_regs.c | 19 +++++++++++++++++++ 5 files changed, 59 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_cpufeature.h diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 61177bac49fa..a85cea2cac57 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -607,6 +607,7 @@ void check_local_cpu_capabilities(void); u64 read_sanitised_ftr_reg(u32 id); u64 __read_sysreg_by_encoding(u32 sys_id); +int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst); static inline bool cpu_supports_mixed_endian_el0(void) { diff --git a/arch/arm64/include/asm/kvm_cpufeature.h b/arch/arm64/include/asm/kvm_cpufeature.h new file mode 100644 index 000000000000..3d245f96a9fe --- /dev/null +++ b/arch/arm64/include/asm/kvm_cpufeature.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 - Google LLC + * Author: Quentin Perret + */ + +#ifndef __ARM64_KVM_CPUFEATURE_H__ +#define __ARM64_KVM_CPUFEATURE_H__ + +#include + +#include + +#if defined(__KVM_NVHE_HYPERVISOR__) +#define DECLARE_KVM_HYP_CPU_FTR_REG(name) extern struct arm64_ftr_reg name +#define DEFINE_KVM_HYP_CPU_FTR_REG(name) struct arm64_ftr_reg name +#else +#define DECLARE_KVM_HYP_CPU_FTR_REG(name) extern struct arm64_ftr_reg kvm_nvhe_sym(name) +#define DEFINE_KVM_HYP_CPU_FTR_REG(name) BUILD_BUG() +#endif + +#endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6a2031af9562..02e172dc5087 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -740,9 +740,13 @@ void kvm_clr_pmu_events(u32 clr); void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); + +void setup_kvm_el2_caps(void); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} + +static inline void setup_kvm_el2_caps(void) {} #endif void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..6252476e4e73 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1154,6 +1154,18 @@ u64 read_sanitised_ftr_reg(u32 id) } EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); +int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); + + if (!regp) + return -EINVAL; + + *dst = *regp; + + return 0; +} + #define read_sysreg_case(r) \ case r: val = read_sysreg_s(r); break; @@ -2773,6 +2785,7 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); + setup_kvm_el2_caps(); if (system_supports_32bit_el0()) setup_elf_hwcaps(compat_elf_hwcaps); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4f2f1e3145de..6c5d133689ae 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -2775,3 +2776,21 @@ void kvm_sys_reg_table_init(void) /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; } + +#define CPU_FTR_REG_HYP_COPY(id, name) \ + { .sys_id = id, .dst = (struct arm64_ftr_reg *)&kvm_nvhe_sym(name) } +struct __ftr_reg_copy_entry { + u32 sys_id; + struct arm64_ftr_reg *dst; +} hyp_ftr_regs[] __initdata = { +}; + +void __init setup_kvm_el2_caps(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hyp_ftr_regs); i++) { + WARN(copy_ftr_reg(hyp_ftr_regs[i].sys_id, hyp_ftr_regs[i].dst), + "%u feature register not found\n", hyp_ftr_regs[i].sys_id); + } +} -- cgit v1.2.3 From d460df12926825a3926da91f054f9f11f88bb33e Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:22 +0000 Subject: KVM: arm64: Provide __flush_dcache_area at EL2 We will need to do cache maintenance at EL2 soon, so compile a copy of __flush_dcache_area at EL2, and provide a copy of arm64_ftr_reg_ctrel0 as it is needed by the read_ctr macro. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-15-qperret@google.com --- arch/arm64/include/asm/kvm_cpufeature.h | 2 ++ arch/arm64/kvm/hyp/nvhe/Makefile | 3 ++- arch/arm64/kvm/hyp/nvhe/cache.S | 13 +++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 6 ++++++ arch/arm64/kvm/sys_regs.c | 1 + 5 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/cache.S diff --git a/arch/arm64/include/asm/kvm_cpufeature.h b/arch/arm64/include/asm/kvm_cpufeature.h index 3d245f96a9fe..c2e7735f502b 100644 --- a/arch/arm64/include/asm/kvm_cpufeature.h +++ b/arch/arm64/include/asm/kvm_cpufeature.h @@ -19,4 +19,6 @@ #define DEFINE_KVM_HYP_CPU_FTR_REG(name) BUILD_BUG() #endif +DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_ctrel0); + #endif diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 6894a917f290..42dde4bb80b1 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,7 +13,8 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o + hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ + cache.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S new file mode 100644 index 000000000000..36cef6915428 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Code copied from arch/arm64/mm/cache.S. + */ + +#include +#include +#include + +SYM_FUNC_START_PI(__flush_dcache_area) + dcache_by_line_op civac, sy, x0, x1, x2, x3 + ret +SYM_FUNC_END_PI(__flush_dcache_area) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 879559057dee..71f00aca90e7 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -5,9 +5,15 @@ */ #include +#include #include #include +/* + * Copies of the host's CPU features registers holding sanitized values. + */ +DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_ctrel0); + /* * nVHE copy of data structures tracking available CPU cores. * Only entries for CPUs that were online at KVM init are populated. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6c5d133689ae..3ec34c25e877 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2783,6 +2783,7 @@ struct __ftr_reg_copy_entry { u32 sys_id; struct arm64_ftr_reg *dst; } hyp_ftr_regs[] __initdata = { + CPU_FTR_REG_HYP_COPY(SYS_CTR_EL0, arm64_ftr_reg_ctrel0), }; void __init setup_kvm_el2_caps(void) -- cgit v1.2.3 From bc1d2892e9aa6dcf6cd83adbd3616051cbd4c429 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:23 +0000 Subject: KVM: arm64: Factor out vector address calculation In order to re-map the guest vectors at EL2 when pKVM is enabled, refactor __kvm_vector_slot2idx() and kvm_init_vector_slot() to move all the address calculation logic in a static inline function. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-16-qperret@google.com --- arch/arm64/include/asm/kvm_mmu.h | 8 ++++++++ arch/arm64/kvm/arm.c | 9 +-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 90873851f677..5c42ec023cc7 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -168,6 +168,14 @@ phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); +static inline void *__kvm_vector_slot2addr(void *base, + enum arm64_hyp_spectre_vector slot) +{ + int idx = slot - (slot != HYP_VECTOR_DIRECT); + + return base + (idx * SZ_2K); +} + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 22d6df525254..e2c471117bff 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1350,16 +1350,9 @@ static unsigned long nvhe_percpu_order(void) /* A lookup table holding the hypervisor VA for each vector slot */ static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; -static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) -{ - return slot - (slot != HYP_VECTOR_DIRECT); -} - static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) { - int idx = __kvm_vector_slot2idx(slot); - - hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); + hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot); } static int kvm_init_vector_slots(void) -- cgit v1.2.3 From 8f4de66e247b805e1b3d1c15367ee0ef4cbb6003 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:24 +0000 Subject: arm64: asm: Provide set_sctlr_el2 macro We will soon need to turn the EL2 stage 1 MMU on and off in nVHE protected mode, so refactor the set_sctlr_el1 macro to make it usable for that purpose. Acked-by: Will Deacon Suggested-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-17-qperret@google.com --- arch/arm64/include/asm/assembler.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ca31594d3d6c..fb651c1f26e9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -676,11 +676,11 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* - * Set SCTLR_EL1 to the passed value, and invalidate the local icache + * Set SCTLR_ELx to the @reg value, and invalidate the local icache * in the process. This is called when setting the MMU on. */ -.macro set_sctlr_el1, reg - msr sctlr_el1, \reg +.macro set_sctlr, sreg, reg + msr \sreg, \reg isb /* * Invalidate the local I-cache so that any instructions fetched @@ -692,6 +692,14 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU isb .endm +.macro set_sctlr_el1, reg + set_sctlr sctlr_el1, \reg +.endm + +.macro set_sctlr_el2, reg + set_sctlr sctlr_el2, \reg +.endm + /* * Check whether to yield to another runnable task from kernel mode NEON code * (which runs with preemption disabled). -- cgit v1.2.3 From f320bc742bc23c1d43567712fe2814bf04b19ebc Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:25 +0000 Subject: KVM: arm64: Prepare the creation of s1 mappings at EL2 When memory protection is enabled, the EL2 code needs the ability to create and manage its own page-table. To do so, introduce a new set of hypercalls to bootstrap a memory management system at EL2. This leads to the following boot flow in nVHE Protected mode: 1. the host allocates memory for the hypervisor very early on, using the memblock API; 2. the host creates a set of stage 1 page-table for EL2, installs the EL2 vectors, and issues the __pkvm_init hypercall; 3. during __pkvm_init, the hypervisor re-creates its stage 1 page-table and stores it in the memory pool provided by the host; 4. the hypervisor then extends its stage 1 mappings to include a vmemmap in the EL2 VA space, hence allowing to use the buddy allocator introduced in a previous patch; 5. the hypervisor jumps back in the idmap page, switches from the host-provided page-table to the new one, and wraps up its initialization by enabling the new allocator, before returning to the host. 6. the host can free the now unused page-table created for EL2, and will now need to issue hypercalls to make changes to the EL2 stage 1 mappings instead of modifying them directly. Note that for the sake of simplifying the review, this patch focuses on the hypervisor side of things. In other words, this only implements the new hypercalls, but does not make use of them from the host yet. The host-side changes will follow in a subsequent patch. Credits to Will for __pkvm_init_switch_pgd. Acked-by: Will Deacon Co-authored-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-18-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 4 + arch/arm64/include/asm/kvm_host.h | 7 ++ arch/arm64/include/asm/kvm_hyp.h | 8 ++ arch/arm64/include/asm/kvm_pgtable.h | 2 + arch/arm64/kernel/image-vars.h | 16 +++ arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/include/nvhe/mm.h | 71 +++++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 4 +- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 27 +++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 49 +++++++++ arch/arm64/kvm/hyp/nvhe/mm.c | 173 ++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 197 +++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 2 - arch/arm64/kvm/hyp/reserved_mem.c | 92 ++++++++++++++++ arch/arm64/mm/init.c | 3 + 15 files changed, 652 insertions(+), 5 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/mm.h create mode 100644 arch/arm64/kvm/hyp/nvhe/mm.c create mode 100644 arch/arm64/kvm/hyp/nvhe/setup.c create mode 100644 arch/arm64/kvm/hyp/reserved_mem.c diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a7ab84f781f7..ebe7007b820b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -57,6 +57,10 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 +#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 +#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16 +#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 +#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 02e172dc5087..f813e1191027 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -770,5 +770,12 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) int kvm_trng_call(struct kvm_vcpu *vcpu); +#ifdef CONFIG_KVM +extern phys_addr_t hyp_mem_base; +extern phys_addr_t hyp_mem_size; +void __init kvm_hyp_reserve(void); +#else +static inline void kvm_hyp_reserve(void) { } +#endif #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 8b6c3a7aac51..de40a565d7e5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -108,4 +108,12 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, u64 elr, u64 par); #endif +#ifdef __KVM_NVHE_HYPERVISOR__ +void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size, + phys_addr_t pgd, void *sp, void *cont_fn); +int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, + unsigned long *per_cpu_base, u32 hyp_va_bits); +void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); +#endif + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index bbe840e430cb..bf7a3cc49420 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -11,6 +11,8 @@ #include #include +#define KVM_PGTABLE_MAX_LEVELS 4U + typedef u64 kvm_pte_t; /** diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 4eb7a15c8b60..940c378fa837 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -115,6 +115,22 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy); KVM_NVHE_ALIAS_HYP(__memset, __pi_memset); #endif +/* Kernel memory sections */ +KVM_NVHE_ALIAS(__start_rodata); +KVM_NVHE_ALIAS(__end_rodata); +KVM_NVHE_ALIAS(__bss_start); +KVM_NVHE_ALIAS(__bss_stop); + +/* Hyp memory sections */ +KVM_NVHE_ALIAS(__hyp_idmap_text_start); +KVM_NVHE_ALIAS(__hyp_idmap_text_end); +KVM_NVHE_ALIAS(__hyp_text_start); +KVM_NVHE_ALIAS(__hyp_text_end); +KVM_NVHE_ALIAS(__hyp_bss_start); +KVM_NVHE_ALIAS(__hyp_bss_end); +KVM_NVHE_ALIAS(__hyp_rodata_start); +KVM_NVHE_ALIAS(__hyp_rodata_end); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 687598e41b21..b726332eec49 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h new file mode 100644 index 000000000000..ac0f7fcffd08 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_MM_H +#define __KVM_HYP_MM_H + +#include +#include +#include +#include + +#include +#include + +#define HYP_MEMBLOCK_REGIONS 128 +extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; +extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); +extern struct kvm_pgtable pkvm_pgtable; +extern hyp_spinlock_t pkvm_pgd_lock; +extern struct hyp_pool hpool; +extern u64 __io_map_base; + +int hyp_create_idmap(u32 hyp_va_bits); +int hyp_map_vectors(void); +int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); +int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); +int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int __pkvm_create_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot); +unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot); + +static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, + unsigned long *start, unsigned long *end) +{ + unsigned long nr_pages = size >> PAGE_SHIFT; + struct hyp_page *p = hyp_phys_to_page(phys); + + *start = (unsigned long)p; + *end = *start + nr_pages * sizeof(struct hyp_page); + *start = ALIGN_DOWN(*start, PAGE_SIZE); + *end = ALIGN(*end, PAGE_SIZE); +} + +static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) +{ + unsigned long total = 0, i; + + /* Provision the worst case scenario */ + for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { + nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); + total += nr_pages; + } + + return total; +} + +static inline unsigned long hyp_s1_pgtable_pages(void) +{ + unsigned long res = 0, i; + + /* Cover all of memory with page-granularity */ + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i]; + res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); + } + + /* Allow 1 GiB for private mappings */ + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; +} +#endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 42dde4bb80b1..b334354b8dd0 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,9 +14,9 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o + cache.o setup.o mm.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o ../exception.o + ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) ## diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index c631e29fb001..a2b8b6a84cbd 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -244,4 +244,31 @@ alternative_else_nop_endif SYM_CODE_END(__kvm_handle_stub_hvc) +SYM_FUNC_START(__pkvm_init_switch_pgd) + /* Turn the MMU off */ + pre_disable_mmu_workaround + mrs x2, sctlr_el2 + bic x3, x2, #SCTLR_ELx_M + msr sctlr_el2, x3 + isb + + tlbi alle2 + + /* Install the new pgtables */ + ldr x3, [x0, #NVHE_INIT_PGD_PA] + phys_to_ttbr x4, x3 +alternative_if ARM64_HAS_CNP + orr x4, x4, #TTBR_CNP_BIT +alternative_else_nop_endif + msr ttbr0_el2, x4 + + /* Set the new stack pointer */ + ldr x0, [x0, #NVHE_INIT_STACK_HYP_VA] + mov sp, x0 + + /* And turn the MMU back on! */ + set_sctlr_el2 x2 + ret x1 +SYM_FUNC_END(__pkvm_init_switch_pgd) + .popsection diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 4a67850702c8..a571cee99a5c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -6,12 +6,14 @@ #include +#include #include #include #include #include #include +#include #include DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); @@ -106,6 +108,49 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); } +static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); + DECLARE_REG(unsigned long, size, host_ctxt, 2); + DECLARE_REG(unsigned long, nr_cpus, host_ctxt, 3); + DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 4); + DECLARE_REG(u32, hyp_va_bits, host_ctxt, 5); + + /* + * __pkvm_init() will return only if an error occurred, otherwise it + * will tail-call in __pkvm_init_finalise() which will have to deal + * with the host context directly. + */ + cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, nr_cpus, per_cpu_base, + hyp_va_bits); +} + +static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(enum arm64_hyp_spectre_vector, slot, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot); +} + +static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, start, host_ctxt, 1); + DECLARE_REG(unsigned long, size, host_ctxt, 2); + DECLARE_REG(unsigned long, phys, host_ctxt, 3); + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4); + + cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot); +} + +static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); + DECLARE_REG(size_t, size, host_ctxt, 2); + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); + + cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -125,6 +170,10 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_get_mdcr_el2), HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), + HANDLE_FUNC(__pkvm_init), + HANDLE_FUNC(__pkvm_cpu_set_vector), + HANDLE_FUNC(__pkvm_create_mappings), + HANDLE_FUNC(__pkvm_create_private_mapping), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c new file mode 100644 index 000000000000..a8efdf0f9003 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct kvm_pgtable pkvm_pgtable; +hyp_spinlock_t pkvm_pgd_lock; +u64 __io_map_base; + +struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; +unsigned int hyp_memblock_nr; + +int __pkvm_create_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot) +{ + int err; + + hyp_spin_lock(&pkvm_pgd_lock); + err = kvm_pgtable_hyp_map(&pkvm_pgtable, start, size, phys, prot); + hyp_spin_unlock(&pkvm_pgd_lock); + + return err; +} + +unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot) +{ + unsigned long addr; + int err; + + hyp_spin_lock(&pkvm_pgd_lock); + + size = PAGE_ALIGN(size + offset_in_page(phys)); + addr = __io_map_base; + __io_map_base += size; + + /* Are we overflowing on the vmemmap ? */ + if (__io_map_base > __hyp_vmemmap) { + __io_map_base -= size; + addr = (unsigned long)ERR_PTR(-ENOMEM); + goto out; + } + + err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot); + if (err) { + addr = (unsigned long)ERR_PTR(err); + goto out; + } + + addr = addr + offset_in_page(phys); +out: + hyp_spin_unlock(&pkvm_pgd_lock); + + return addr; +} + +int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) +{ + unsigned long start = (unsigned long)from; + unsigned long end = (unsigned long)to; + unsigned long virt_addr; + phys_addr_t phys; + + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); + + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys = hyp_virt_to_phys((void *)virt_addr); + err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot); + if (err) + return err; + } + + return 0; +} + +int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) +{ + unsigned long start, end; + + hyp_vmemmap_range(phys, size, &start, &end); + + return __pkvm_create_mappings(start, end - start, back, PAGE_HYP); +} + +static void *__hyp_bp_vect_base; +int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot) +{ + void *vector; + + switch (slot) { + case HYP_VECTOR_DIRECT: { + vector = __kvm_hyp_vector; + break; + } + case HYP_VECTOR_SPECTRE_DIRECT: { + vector = __bp_harden_hyp_vecs; + break; + } + case HYP_VECTOR_INDIRECT: + case HYP_VECTOR_SPECTRE_INDIRECT: { + vector = (void *)__hyp_bp_vect_base; + break; + } + default: + return -EINVAL; + } + + vector = __kvm_vector_slot2addr(vector, slot); + *this_cpu_ptr(&kvm_hyp_vector) = (unsigned long)vector; + + return 0; +} + +int hyp_map_vectors(void) +{ + phys_addr_t phys; + void *bp_base; + + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + return 0; + + phys = __hyp_pa(__bp_harden_hyp_vecs); + bp_base = (void *)__pkvm_create_private_mapping(phys, + __BP_HARDEN_HYP_VECS_SZ, + PAGE_HYP_EXEC); + if (IS_ERR_OR_NULL(bp_base)) + return PTR_ERR(bp_base); + + __hyp_bp_vect_base = bp_base; + + return 0; +} + +int hyp_create_idmap(u32 hyp_va_bits) +{ + unsigned long start, end; + + start = hyp_virt_to_phys((void *)__hyp_idmap_text_start); + start = ALIGN_DOWN(start, PAGE_SIZE); + + end = hyp_virt_to_phys((void *)__hyp_idmap_text_end); + end = ALIGN(end, PAGE_SIZE); + + /* + * One half of the VA space is reserved to linearly map portions of + * memory -- see va_layout.c for more details. The other half of the VA + * space contains the trampoline page, and needs some care. Split that + * second half in two and find the quarter of VA space not conflicting + * with the idmap to place the IOs and the vmemmap. IOs use the lower + * half of the quarter and the vmemmap the upper half. + */ + __io_map_base = start & BIT(hyp_va_bits - 2); + __io_map_base ^= BIT(hyp_va_bits - 2); + __hyp_vmemmap = __io_map_base | BIT(hyp_va_bits - 3); + + return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); +} diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c new file mode 100644 index 000000000000..1e8bcd8b0299 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct hyp_pool hpool; +struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; +unsigned long hyp_nr_cpus; + +#define hyp_percpu_size ((unsigned long)__per_cpu_end - \ + (unsigned long)__per_cpu_start) + +static void *vmemmap_base; +static void *hyp_pgt_base; + +static int divide_memory_pool(void *virt, unsigned long size) +{ + unsigned long vstart, vend, nr_pages; + + hyp_early_alloc_init(virt, size); + + hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend); + nr_pages = (vend - vstart) >> PAGE_SHIFT; + vmemmap_base = hyp_early_alloc_contig(nr_pages); + if (!vmemmap_base) + return -ENOMEM; + + nr_pages = hyp_s1_pgtable_pages(); + hyp_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!hyp_pgt_base) + return -ENOMEM; + + return 0; +} + +static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, + unsigned long *per_cpu_base, + u32 hyp_va_bits) +{ + void *start, *end, *virt = hyp_phys_to_virt(phys); + unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT; + int ret, i; + + /* Recreate the hyp page-table using the early page allocator */ + hyp_early_alloc_init(hyp_pgt_base, pgt_size); + ret = kvm_pgtable_hyp_init(&pkvm_pgtable, hyp_va_bits, + &hyp_early_alloc_mm_ops); + if (ret) + return ret; + + ret = hyp_create_idmap(hyp_va_bits); + if (ret) + return ret; + + ret = hyp_map_vectors(); + if (ret) + return ret; + + ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base)); + if (ret) + return ret; + + ret = pkvm_create_mappings(__hyp_text_start, __hyp_text_end, PAGE_HYP_EXEC); + if (ret) + return ret; + + ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO); + if (ret) + return ret; + + ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO); + if (ret) + return ret; + + ret = pkvm_create_mappings(__hyp_bss_start, __hyp_bss_end, PAGE_HYP); + if (ret) + return ret; + + ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO); + if (ret) + return ret; + + ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP); + if (ret) + return ret; + + for (i = 0; i < hyp_nr_cpus; i++) { + start = (void *)kern_hyp_va(per_cpu_base[i]); + end = start + PAGE_ALIGN(hyp_percpu_size); + ret = pkvm_create_mappings(start, end, PAGE_HYP); + if (ret) + return ret; + + end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va; + start = end - PAGE_SIZE; + ret = pkvm_create_mappings(start, end, PAGE_HYP); + if (ret) + return ret; + } + + return 0; +} + +static void update_nvhe_init_params(void) +{ + struct kvm_nvhe_init_params *params; + unsigned long i; + + for (i = 0; i < hyp_nr_cpus; i++) { + params = per_cpu_ptr(&kvm_init_params, i); + params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); + __flush_dcache_area(params, sizeof(*params)); + } +} + +static void *hyp_zalloc_hyp_page(void *arg) +{ + return hyp_alloc_pages(&hpool, 0); +} + +void __noreturn __pkvm_init_finalise(void) +{ + struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); + struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + unsigned long nr_pages, reserved_pages, pfn; + int ret; + + /* Now that the vmemmap is backed, install the full-fledged allocator */ + pfn = hyp_virt_to_pfn(hyp_pgt_base); + nr_pages = hyp_s1_pgtable_pages(); + reserved_pages = hyp_early_alloc_nr_used_pages(); + ret = hyp_pool_init(&hpool, pfn, nr_pages, reserved_pages); + if (ret) + goto out; + + pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_page = hyp_zalloc_hyp_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .get_page = hyp_get_page, + .put_page = hyp_put_page, + }; + pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; + +out: + /* + * We tail-called to here from handle___pkvm_init() and will not return, + * so make sure to propagate the return value to the host. + */ + cpu_reg(host_ctxt, 1) = ret; + + __host_enter(host_ctxt); +} + +int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, + unsigned long *per_cpu_base, u32 hyp_va_bits) +{ + struct kvm_nvhe_init_params *params; + void *virt = hyp_phys_to_virt(phys); + void (*fn)(phys_addr_t params_pa, void *finalize_fn_va); + int ret; + + if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) + return -EINVAL; + + hyp_spin_lock_init(&pkvm_pgd_lock); + hyp_nr_cpus = nr_cpus; + + ret = divide_memory_pool(virt, size); + if (ret) + return ret; + + ret = recreate_hyp_mappings(phys, size, per_cpu_base, hyp_va_bits); + if (ret) + return ret; + + update_nvhe_init_params(); + + /* Jump in the idmap page to switch to the new page-tables */ + params = this_cpu_ptr(&kvm_init_params); + fn = (typeof(fn))__hyp_pa(__pkvm_init_switch_pgd); + fn(__hyp_pa(params), __pkvm_init_finalise); + + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index ff478a576f4d..82aca35a22f6 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -10,8 +10,6 @@ #include #include -#define KVM_PGTABLE_MAX_LEVELS 4U - #define KVM_PTE_VALID BIT(0) #define KVM_PTE_TYPE BIT(1) diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c new file mode 100644 index 000000000000..9bc6a6d27904 --- /dev/null +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 - Google LLC + * Author: Quentin Perret + */ + +#include +#include + +#include + +#include +#include + +static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); +static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); + +phys_addr_t hyp_mem_base; +phys_addr_t hyp_mem_size; + +static int __init register_memblock_regions(void) +{ + struct memblock_region *reg; + + for_each_mem_region(reg) { + if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) + return -ENOMEM; + + hyp_memory[*hyp_memblock_nr_ptr] = *reg; + (*hyp_memblock_nr_ptr)++; + } + + return 0; +} + +void __init kvm_hyp_reserve(void) +{ + u64 nr_pages, prev, hyp_mem_pages = 0; + int ret; + + if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) + return; + + if (kvm_get_mode() != KVM_MODE_PROTECTED) + return; + + ret = register_memblock_regions(); + if (ret) { + *hyp_memblock_nr_ptr = 0; + kvm_err("Failed to register hyp memblocks: %d\n", ret); + return; + } + + hyp_mem_pages += hyp_s1_pgtable_pages(); + + /* + * The hyp_vmemmap needs to be backed by pages, but these pages + * themselves need to be present in the vmemmap, so compute the number + * of pages needed by looking for a fixed point. + */ + nr_pages = 0; + do { + prev = nr_pages; + nr_pages = hyp_mem_pages + prev; + nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE); + nr_pages += __hyp_pgtable_max_pages(nr_pages); + } while (nr_pages != prev); + hyp_mem_pages += nr_pages; + + /* + * Try to allocate a PMD-aligned region to reduce TLB pressure once + * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. + */ + hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; + hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), + ALIGN(hyp_mem_size, PMD_SIZE), + PMD_SIZE); + if (!hyp_mem_base) + hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), + hyp_mem_size, PAGE_SIZE); + else + hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); + + if (!hyp_mem_base) { + kvm_err("Failed to reserve hyp memory\n"); + return; + } + memblock_reserve(hyp_mem_base, hyp_mem_size); + + kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, + hyp_mem_base); +} diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 3685e12aba9b..6cb22da2e226 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -452,6 +453,8 @@ void __init bootmem_init(void) dma_pernuma_cma_reserve(); + kvm_hyp_reserve(); + /* * sparse_init() tries to allocate memory from memblock, so must be * done after the fixed reservations -- cgit v1.2.3 From bfa79a805454f768b8d76ab683659d9e219a037a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:26 +0000 Subject: KVM: arm64: Elevate hypervisor mappings creation at EL2 Previous commits have introduced infrastructure to enable the EL2 code to manage its own stage 1 mappings. However, this was preliminary work, and none of it is currently in use. Put all of this together by elevating the mapping creation at EL2 when memory protection is enabled. In this case, the host kernel running at EL1 still creates _temporary_ EL2 mappings, only used while initializing the hypervisor, but frees them right after. As such, all calls to create_hyp_mappings() after kvm init has finished turn into hypercalls, as the host now has no 'legal' way to modify the hypevisor page tables directly. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-19-qperret@google.com --- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/kvm/arm.c | 87 +++++++++++++++++++++++++++++++++++++--- arch/arm64/kvm/mmu.c | 43 +++++++++++++++++--- 3 files changed, 120 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 5c42ec023cc7..ce02a4052dcf 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,7 +166,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -int kvm_mmu_init(void); +int kvm_mmu_init(u32 *hyp_va_bits); static inline void *__kvm_vector_slot2addr(void *base, enum arm64_hyp_spectre_vector slot) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e2c471117bff..d93ea0b82491 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1426,7 +1426,7 @@ static void cpu_prepare_hyp_mode(int cpu) kvm_flush_dcache_to_poc(params, sizeof(*params)); } -static void cpu_init_hyp_mode(void) +static void hyp_install_host_vector(void) { struct kvm_nvhe_init_params *params; struct arm_smccc_res res; @@ -1444,6 +1444,11 @@ static void cpu_init_hyp_mode(void) params = this_cpu_ptr_nvhe_sym(kvm_init_params); arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); +} + +static void cpu_init_hyp_mode(void) +{ + hyp_install_host_vector(); /* * Disabling SSBD on a non-VHE system requires us to enable SSBS @@ -1486,7 +1491,10 @@ static void cpu_set_hyp_vector(void) struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); void *vector = hyp_spectre_vector_selector[data->slot]; - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; + if (!is_protected_kvm_enabled()) + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; + else + kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); } static void cpu_hyp_reinit(void) @@ -1494,13 +1502,14 @@ static void cpu_hyp_reinit(void) kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); else cpu_init_hyp_mode(); + cpu_set_hyp_vector(); + kvm_arm_init_debug(); if (vgic_present) @@ -1696,18 +1705,59 @@ static void teardown_hyp_mode(void) } } +static int do_pkvm_init(u32 hyp_va_bits) +{ + void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base); + int ret; + + preempt_disable(); + hyp_install_host_vector(); + ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, + num_possible_cpus(), kern_hyp_va(per_cpu_base), + hyp_va_bits); + preempt_enable(); + + return ret; +} + +static int kvm_hyp_init_protection(u32 hyp_va_bits) +{ + void *addr = phys_to_virt(hyp_mem_base); + int ret; + + ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); + if (ret) + return ret; + + ret = do_pkvm_init(hyp_va_bits); + if (ret) + return ret; + + free_hyp_pgds(); + + return 0; +} + /** * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { + u32 hyp_va_bits; int cpu; - int err = 0; + int err = -ENOMEM; + + /* + * The protected Hyp-mode cannot be initialized if the memory pool + * allocation has failed. + */ + if (is_protected_kvm_enabled() && !hyp_mem_base) + goto out_err; /* * Allocate Hyp PGD and setup Hyp identity mapping */ - err = kvm_mmu_init(); + err = kvm_mmu_init(&hyp_va_bits); if (err) goto out_err; @@ -1823,6 +1873,14 @@ static int init_hyp_mode(void) goto out_err; } + if (is_protected_kvm_enabled()) { + err = kvm_hyp_init_protection(hyp_va_bits); + if (err) { + kvm_err("Failed to init hyp memory protection\n"); + goto out_err; + } + } + return 0; out_err: @@ -1831,6 +1889,16 @@ out_err: return err; } +static int finalize_hyp_mode(void) +{ + if (!is_protected_kvm_enabled()) + return 0; + + static_branch_enable(&kvm_protected_mode_initialized); + + return 0; +} + static void check_kvm_target_cpu(void *ret) { *(int *)ret = kvm_target_cpu(); @@ -1942,8 +2010,15 @@ int kvm_arch_init(void *opaque) if (err) goto out_hyp; + if (!in_hyp_mode) { + err = finalize_hyp_mode(); + if (err) { + kvm_err("Failed to finalize Hyp protection\n"); + goto out_hyp; + } + } + if (is_protected_kvm_enabled()) { - static_branch_enable(&kvm_protected_mode_initialized); kvm_info("Protected nVHE mode initialized successfully\n"); } else if (in_hyp_mode) { kvm_info("VHE mode initialized successfully\n"); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e583f7fb3620..de0ad79d2c90 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -221,15 +221,39 @@ void free_hyp_pgds(void) if (hyp_pgtable) { kvm_pgtable_hyp_destroy(hyp_pgtable); kfree(hyp_pgtable); + hyp_pgtable = NULL; } mutex_unlock(&kvm_hyp_pgd_mutex); } +static bool kvm_host_owns_hyp_mappings(void) +{ + if (static_branch_likely(&kvm_protected_mode_initialized)) + return false; + + /* + * This can happen at boot time when __create_hyp_mappings() is called + * after the hyp protection has been enabled, but the static key has + * not been flipped yet. + */ + if (!hyp_pgtable && is_protected_kvm_enabled()) + return false; + + WARN_ON(!hyp_pgtable); + + return true; +} + static int __create_hyp_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { int err; + if (!kvm_host_owns_hyp_mappings()) { + return kvm_call_hyp_nvhe(__pkvm_create_mappings, + start, size, phys, prot); + } + mutex_lock(&kvm_hyp_pgd_mutex); err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot); mutex_unlock(&kvm_hyp_pgd_mutex); @@ -291,6 +315,16 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, unsigned long base; int ret = 0; + if (!kvm_host_owns_hyp_mappings()) { + base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, + phys_addr, size, prot); + if (IS_ERR_OR_NULL((void *)base)) + return PTR_ERR((void *)base); + *haddr = base; + + return 0; + } + mutex_lock(&kvm_hyp_pgd_mutex); /* @@ -1270,10 +1304,9 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { .virt_to_phys = kvm_host_pa, }; -int kvm_mmu_init(void) +int kvm_mmu_init(u32 *hyp_va_bits) { int err; - u32 hyp_va_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -1287,8 +1320,8 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); - kvm_debug("Using %u-bit virtual addresses at EL2\n", hyp_va_bits); + *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", kern_hyp_va(PAGE_OFFSET), @@ -1313,7 +1346,7 @@ int kvm_mmu_init(void) goto out; } - err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits, &kvm_hyp_mm_ops); + err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops); if (err) goto out_free_pgtable; -- cgit v1.2.3 From 834cd93deb75f3a43420e479f133dd02fba95aa6 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:27 +0000 Subject: KVM: arm64: Use kvm_arch for stage 2 pgtable In order to make use of the stage 2 pgtable code for the host stage 2, use struct kvm_arch in lieu of struct kvm as the host will have the former but not the latter. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-20-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 5 +++-- arch/arm64/kvm/hyp/pgtable.c | 6 +++--- arch/arm64/kvm/mmu.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index bf7a3cc49420..7945ec87eaec 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -162,12 +162,13 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, /** * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. - * @kvm: KVM structure representing the guest virtual machine. + * @arch: Arch-specific KVM structure representing the guest virtual + * machine. * @mm_ops: Memory management callbacks. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm, +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, struct kvm_pgtable_mm_ops *mm_ops); /** diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 82aca35a22f6..ea95bbc6ba80 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -880,11 +880,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) return kvm_pgtable_walk(pgt, addr, size, &walker); } -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm, +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, struct kvm_pgtable_mm_ops *mm_ops) { size_t pgd_sz; - u64 vtcr = kvm->arch.vtcr; + u64 vtcr = arch->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; @@ -897,7 +897,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm, pgt->ia_bits = ia_bits; pgt->start_level = start_level; pgt->mm_ops = mm_ops; - pgt->mmu = &kvm->arch.mmu; + pgt->mmu = &arch->mmu; /* Ensure zeroed PGD pages are visible to the hardware walker */ dsb(ishst); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index de0ad79d2c90..d6eb1fb21232 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -457,7 +457,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) if (!pgt) return -ENOMEM; - err = kvm_pgtable_stage2_init(pgt, kvm, &kvm_s2_mm_ops); + err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops); if (err) goto out_free_pgtable; -- cgit v1.2.3 From cfb1a98de7a9aa51931ff5b336fc5c3c201d01cc Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:28 +0000 Subject: KVM: arm64: Use kvm_arch in kvm_s2_mmu In order to make use of the stage 2 pgtable code for the host stage 2, change kvm_s2_mmu to use a kvm_arch pointer in lieu of the kvm pointer, as the host will have the former but not the latter. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-21-qperret@google.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 6 +++++- arch/arm64/kvm/mmu.c | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f813e1191027..4859c9de75d7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -94,7 +94,7 @@ struct kvm_s2_mmu { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; - struct kvm *kvm; + struct kvm_arch *arch; }; struct kvm_arch_memory_slot { diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ce02a4052dcf..6f743e20cb06 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -272,7 +272,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) */ static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) { - write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); + write_sysreg(kern_hyp_va(mmu->arch)->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* @@ -283,5 +283,9 @@ static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } +static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) +{ + return container_of(mmu->arch, struct kvm, arch); +} #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d6eb1fb21232..0f16b70befa8 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -165,7 +165,7 @@ static void *kvm_host_va(phys_addr_t phys) static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, bool may_block) { - struct kvm *kvm = mmu->kvm; + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); phys_addr_t end = start + size; assert_spin_locked(&kvm->mmu_lock); @@ -470,7 +470,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) for_each_possible_cpu(cpu) *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; - mmu->kvm = kvm; + mmu->arch = &kvm->arch; mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); mmu->vmid.vmid_gen = 0; @@ -552,7 +552,7 @@ void stage2_unmap_vm(struct kvm *kvm) void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { - struct kvm *kvm = mmu->kvm; + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); struct kvm_pgtable *pgt = NULL; spin_lock(&kvm->mmu_lock); @@ -621,7 +621,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, */ static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { - struct kvm *kvm = mmu->kvm; + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect); } -- cgit v1.2.3 From 734864c177bca5148adfe7a96744993d61513430 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:29 +0000 Subject: KVM: arm64: Set host stage 2 using kvm_nvhe_init_params Move the registers relevant to host stage 2 enablement to kvm_nvhe_init_params to prepare the ground for enabling it in later patches. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-22-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 3 +++ arch/arm64/kernel/asm-offsets.c | 3 +++ arch/arm64/kvm/arm.c | 5 +++++ arch/arm64/kvm/hyp/nvhe/hyp-init.S | 14 +++++++++----- arch/arm64/kvm/hyp/nvhe/switch.c | 5 +---- 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ebe7007b820b..08f63c09cd11 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -158,6 +158,9 @@ struct kvm_nvhe_init_params { unsigned long tpidr_el2; unsigned long stack_hyp_va; phys_addr_t pgd_pa; + unsigned long hcr_el2; + unsigned long vttbr; + unsigned long vtcr; }; /* Translate a kernel address @ptr into its equivalent linear mapping */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a36e2fc330d4..8930b42f6418 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -120,6 +120,9 @@ int main(void) DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2)); DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va)); DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa)); + DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2)); + DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr)); + DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d93ea0b82491..a6b5ba195ca9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1418,6 +1418,11 @@ static void cpu_prepare_hyp_mode(int cpu) params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); + if (is_protected_kvm_enabled()) + params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; + else + params->hcr_el2 = HCR_HOST_NVHE_FLAGS; + params->vttbr = params->vtcr = 0; /* * Flush the init params from the data cache because the struct will diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index a2b8b6a84cbd..a50ad9e9fc05 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -83,11 +83,6 @@ SYM_CODE_END(__kvm_hyp_init) * x0: struct kvm_nvhe_init_params PA */ SYM_CODE_START_LOCAL(___kvm_hyp_init) -alternative_if ARM64_KVM_PROTECTED_MODE - mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS - msr hcr_el2, x1 -alternative_else_nop_endif - ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] msr tpidr_el2, x1 @@ -97,6 +92,15 @@ alternative_else_nop_endif ldr x1, [x0, #NVHE_INIT_MAIR_EL2] msr mair_el2, x1 + ldr x1, [x0, #NVHE_INIT_HCR_EL2] + msr hcr_el2, x1 + + ldr x1, [x0, #NVHE_INIT_VTTBR] + msr vttbr_el2, x1 + + ldr x1, [x0, #NVHE_INIT_VTCR] + msr vtcr_el2, x1 + ldr x1, [x0, #NVHE_INIT_PGD_PA] phys_to_ttbr x2, x1 alternative_if ARM64_HAS_CNP diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f6d542ecf6a7..99323563022a 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -97,10 +97,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; write_sysreg(mdcr_el2, mdcr_el2); - if (is_protected_kvm_enabled()) - write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2); - else - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); cptr = CPTR_EL2_DEFAULT; if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) -- cgit v1.2.3 From bcb25a2b86b4b96385ffbcc54d51c400793b7393 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:30 +0000 Subject: KVM: arm64: Refactor kvm_arm_setup_stage2() In order to re-use some of the stage 2 setup code at EL2, factor parts of kvm_arm_setup_stage2() out into separate functions. No functional change intended. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-23-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 26 ++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 32 +++++++++++++++++++++++++++ arch/arm64/kvm/reset.c | 42 ++++-------------------------------- 3 files changed, 62 insertions(+), 38 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 7945ec87eaec..9cdc198ea6b4 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -13,6 +13,16 @@ #define KVM_PGTABLE_MAX_LEVELS 4U +static inline u64 kvm_get_parange(u64 mmfr0) +{ + u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); + if (parange > ID_AA64MMFR0_PARANGE_MAX) + parange = ID_AA64MMFR0_PARANGE_MAX; + + return parange; +} + typedef u64 kvm_pte_t; /** @@ -159,6 +169,22 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot); +/** + * kvm_get_vtcr() - Helper to construct VTCR_EL2 + * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. + * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register. + * @phys_shfit: Value to set in VTCR_EL2.T0SZ. + * + * The VTCR value is common across all the physical CPUs on the system. + * We use system wide sanitised values to fill in different fields, + * except for Hardware Management of Access Flags. HA Flag is set + * unconditionally on all CPUs, as it is safe to run with or without + * the feature and the bit is RES0 on CPUs that don't support it. + * + * Return: VTCR_EL2 value + */ +u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); + /** * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index ea95bbc6ba80..4e15ccafd640 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -9,6 +9,7 @@ #include #include +#include #define KVM_PTE_VALID BIT(0) @@ -450,6 +451,37 @@ struct stage2_map_data { struct kvm_pgtable_mm_ops *mm_ops; }; +u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) +{ + u64 vtcr = VTCR_EL2_FLAGS; + u8 lvls; + + vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; + vtcr |= VTCR_EL2_T0SZ(phys_shift); + /* + * Use a minimum 2 level page table to prevent splitting + * host PMD huge pages at stage2. + */ + lvls = stage2_pgtable_levels(phys_shift); + if (lvls < 2) + lvls = 2; + vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); + + /* + * Enable the Hardware Access Flag management, unconditionally + * on all CPUs. The features is RES0 on CPUs without the support + * and must be ignored by the CPUs. + */ + vtcr |= VTCR_EL2_HA; + + /* Set the vmid bits */ + vtcr |= (get_vmid_bits(mmfr1) == 16) ? + VTCR_EL2_VS_16BIT : + VTCR_EL2_VS_8BIT; + + return vtcr; +} + static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, struct stage2_map_data *data) { diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 67f30953d6d0..86d94f616a1e 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -329,19 +329,10 @@ int kvm_set_ipa_limit(void) return 0; } -/* - * Configure the VTCR_EL2 for this VM. The VTCR value is common - * across all the physical CPUs on the system. We use system wide - * sanitised values to fill in different fields, except for Hardware - * Management of Access Flags. HA Flag is set unconditionally on - * all CPUs, as it is safe to run with or without the feature and - * the bit is RES0 on CPUs that don't support it. - */ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { - u64 vtcr = VTCR_EL2_FLAGS, mmfr0; - u32 parange, phys_shift; - u8 lvls; + u64 mmfr0, mmfr1; + u32 phys_shift; if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) return -EINVAL; @@ -361,33 +352,8 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) } mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - parange = cpuid_feature_extract_unsigned_field(mmfr0, - ID_AA64MMFR0_PARANGE_SHIFT); - if (parange > ID_AA64MMFR0_PARANGE_MAX) - parange = ID_AA64MMFR0_PARANGE_MAX; - vtcr |= parange << VTCR_EL2_PS_SHIFT; - - vtcr |= VTCR_EL2_T0SZ(phys_shift); - /* - * Use a minimum 2 level page table to prevent splitting - * host PMD huge pages at stage2. - */ - lvls = stage2_pgtable_levels(phys_shift); - if (lvls < 2) - lvls = 2; - vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); - - /* - * Enable the Hardware Access Flag management, unconditionally - * on all CPUs. The features is RES0 on CPUs without the support - * and must be ignored by the CPUs. - */ - vtcr |= VTCR_EL2_HA; + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); - /* Set the vmid bits */ - vtcr |= (kvm_get_vmid_bits() == 16) ? - VTCR_EL2_VS_16BIT : - VTCR_EL2_VS_8BIT; - kvm->arch.vtcr = vtcr; return 0; } -- cgit v1.2.3 From 6ec7e56d3265f6e7673d0788bfa3a76820c9bdfe Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:31 +0000 Subject: KVM: arm64: Refactor __load_guest_stage2() Refactor __load_guest_stage2() to introduce __load_stage2() which will be re-used when loading the host stage 2. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-24-qperret@google.com --- arch/arm64/include/asm/kvm_mmu.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6f743e20cb06..9d64fa73ee67 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -270,9 +270,9 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr) { - write_sysreg(kern_hyp_va(mmu->arch)->vtcr, vtcr_el2); + write_sysreg(vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* @@ -283,6 +283,11 @@ static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +{ + __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr); +} + static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); -- cgit v1.2.3 From 159b859beed76836a2c7cfa6303c312a40bb9dc7 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:32 +0000 Subject: KVM: arm64: Refactor __populate_fault_info() Refactor __populate_fault_info() to introduce __get_fault_info() which will be used once the host is wrapped in a stage 2. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-25-qperret@google.com --- arch/arm64/kvm/hyp/include/hyp/switch.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1073f176e92c..cdf42e347d3f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -160,18 +160,10 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } -static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) { - u8 ec; - u64 esr; u64 hpfar, far; - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - far = read_sysreg_el2(SYS_FAR); /* @@ -194,11 +186,25 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) hpfar = read_sysreg(hpfar_el2); } - vcpu->arch.fault.far_el2 = far; - vcpu->arch.fault.hpfar_el2 = hpfar; + fault->far_el2 = far; + fault->hpfar_el2 = hpfar; return true; } +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u8 ec; + u64 esr; + + esr = vcpu->arch.fault.esr_el2; + ec = ESR_ELx_EC(esr); + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + return __get_fault_info(esr, &vcpu->arch.fault); +} + static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) { struct thread_struct *thread; -- cgit v1.2.3 From e37f37a0e780f23210b2a5cb314dab39fea7086a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:33 +0000 Subject: KVM: arm64: Make memcache anonymous in pgtable allocator The current stage2 page-table allocator uses a memcache to get pre-allocated pages when it needs any. To allow re-using this code at EL2 which uses a concept of memory pools, make the memcache argument of kvm_pgtable_stage2_map() anonymous, and let the mm_ops zalloc_page() callbacks use it the way they need to. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-26-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 6 +++--- arch/arm64/kvm/hyp/pgtable.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 9cdc198ea6b4..4ae19247837b 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -213,8 +213,8 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * @size: Size of the mapping. * @phys: Physical address of the memory to map. * @prot: Permissions and attributes for the mapping. - * @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to - * allocate page-table pages. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -236,7 +236,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - struct kvm_mmu_memory_cache *mc); + void *mc); /** * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 4e15ccafd640..15de1708cfcd 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -446,7 +446,7 @@ struct stage2_map_data { kvm_pte_t *anchor; struct kvm_s2_mmu *mmu; - struct kvm_mmu_memory_cache *memcache; + void *memcache; struct kvm_pgtable_mm_ops *mm_ops; }; @@ -670,7 +670,7 @@ static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - struct kvm_mmu_memory_cache *mc) + void *mc) { int ret; struct stage2_map_data map_data = { -- cgit v1.2.3 From 04e5de03093f669ccc233e56b7838bfa7a7af6e1 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:34 +0000 Subject: KVM: arm64: Reserve memory for host stage 2 Extend the memory pool allocated for the hypervisor to include enough pages to map all of memory at page granularity for the host stage 2. While at it, also reserve some memory for device mappings. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-27-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 27 ++++++++++++++++++++++++++- arch/arm64/kvm/hyp/nvhe/setup.c | 12 ++++++++++++ arch/arm64/kvm/hyp/reserved_mem.c | 2 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index ac0f7fcffd08..0095f6289742 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -53,7 +53,7 @@ static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) return total; } -static inline unsigned long hyp_s1_pgtable_pages(void) +static inline unsigned long __hyp_pgtable_total_pages(void) { unsigned long res = 0, i; @@ -63,9 +63,34 @@ static inline unsigned long hyp_s1_pgtable_pages(void) res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); } + return res; +} + +static inline unsigned long hyp_s1_pgtable_pages(void) +{ + unsigned long res; + + res = __hyp_pgtable_total_pages(); + /* Allow 1 GiB for private mappings */ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); return res; } + +static inline unsigned long host_s2_mem_pgtable_pages(void) +{ + /* + * Include an extra 16 pages to safely upper-bound the worst case of + * concatenated pgds. + */ + return __hyp_pgtable_total_pages() + 16; +} + +static inline unsigned long host_s2_dev_pgtable_pages(void) +{ + /* Allow 1 GiB for MMIO mappings */ + return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); +} + #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 1e8bcd8b0299..c1a3e7e0ebbc 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -24,6 +24,8 @@ unsigned long hyp_nr_cpus; static void *vmemmap_base; static void *hyp_pgt_base; +static void *host_s2_mem_pgt_base; +static void *host_s2_dev_pgt_base; static int divide_memory_pool(void *virt, unsigned long size) { @@ -42,6 +44,16 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!hyp_pgt_base) return -ENOMEM; + nr_pages = host_s2_mem_pgtable_pages(); + host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_mem_pgt_base) + return -ENOMEM; + + nr_pages = host_s2_dev_pgtable_pages(); + host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_dev_pgt_base) + return -ENOMEM; + return 0; } diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index 9bc6a6d27904..fd42705a3c26 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -52,6 +52,8 @@ void __init kvm_hyp_reserve(void) } hyp_mem_pages += hyp_s1_pgtable_pages(); + hyp_mem_pages += host_s2_mem_pgtable_pages(); + hyp_mem_pages += host_s2_dev_pgtable_pages(); /* * The hyp_vmemmap needs to be backed by pages, but these pages -- cgit v1.2.3 From a14307f5310c737744641ff8da7a8d491c3c85cd Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:35 +0000 Subject: KVM: arm64: Sort the hypervisor memblocks We will soon need to check if a Physical Address belongs to a memblock at EL2, so make sure to sort them so this can be done efficiently. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-28-qperret@google.com --- arch/arm64/kvm/hyp/reserved_mem.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index fd42705a3c26..83ca23ac259b 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -6,6 +6,7 @@ #include #include +#include #include @@ -18,6 +19,23 @@ static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); phys_addr_t hyp_mem_base; phys_addr_t hyp_mem_size; +static int cmp_hyp_memblock(const void *p1, const void *p2) +{ + const struct memblock_region *r1 = p1; + const struct memblock_region *r2 = p2; + + return r1->base < r2->base ? -1 : (r1->base > r2->base); +} + +static void __init sort_memblock_regions(void) +{ + sort(hyp_memory, + *hyp_memblock_nr_ptr, + sizeof(struct memblock_region), + cmp_hyp_memblock, + NULL); +} + static int __init register_memblock_regions(void) { struct memblock_region *reg; @@ -29,6 +47,7 @@ static int __init register_memblock_regions(void) hyp_memory[*hyp_memblock_nr_ptr] = *reg; (*hyp_memblock_nr_ptr)++; } + sort_memblock_regions(); return 0; } -- cgit v1.2.3 From f60ca2f9321a71ee3d2a7bd620c1827b82ce05f2 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:36 +0000 Subject: KVM: arm64: Always zero invalid PTEs kvm_set_invalid_pte() currently only clears bit 0 from a PTE because stage2_map_walk_table_post() needs to be able to follow the anchor. In preparation for re-using bits 63-01 from invalid PTEs, make sure to zero it entirely by ensuring to cache the anchor's child upfront. Acked-by: Will Deacon Suggested-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-29-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 15de1708cfcd..0a674010afb6 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -156,10 +156,9 @@ static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_op return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); } -static void kvm_set_invalid_pte(kvm_pte_t *ptep) +static void kvm_clear_pte(kvm_pte_t *ptep) { - kvm_pte_t pte = *ptep; - WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); + WRITE_ONCE(*ptep, 0); } static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp, @@ -444,6 +443,7 @@ struct stage2_map_data { kvm_pte_t attr; kvm_pte_t *anchor; + kvm_pte_t *childp; struct kvm_s2_mmu *mmu; void *memcache; @@ -533,7 +533,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, * There's an existing different valid leaf entry, so perform * break-before-make. */ - kvm_set_invalid_pte(ptep); + kvm_clear_pte(ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); mm_ops->put_page(ptep); } @@ -554,7 +554,8 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, data->phys, level)) return 0; - kvm_set_invalid_pte(ptep); + data->childp = kvm_pte_follow(*ptep, data->mm_ops); + kvm_clear_pte(ptep); /* * Invalidate the whole stage-2, as we may have numerous leaf @@ -600,7 +601,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * will be mapped lazily. */ if (kvm_pte_valid(pte)) { - kvm_set_invalid_pte(ptep); + kvm_clear_pte(ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); mm_ops->put_page(ptep); } @@ -616,19 +617,24 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, struct stage2_map_data *data) { struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + kvm_pte_t *childp; int ret = 0; if (!data->anchor) return 0; - mm_ops->put_page(kvm_pte_follow(*ptep, mm_ops)); - mm_ops->put_page(ptep); - if (data->anchor == ptep) { + childp = data->childp; data->anchor = NULL; + data->childp = NULL; ret = stage2_map_walk_leaf(addr, end, level, ptep, data); + } else { + childp = kvm_pte_follow(*ptep, mm_ops); } + mm_ops->put_page(childp); + mm_ops->put_page(ptep); + return ret; } @@ -737,7 +743,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * block entry and rely on the remaining portions being faulted * back lazily. */ - kvm_set_invalid_pte(ptep); + kvm_clear_pte(ptep); kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); mm_ops->put_page(ptep); -- cgit v1.2.3 From 807923e04a0f5c6c34dc2eb52ae544cb0e4e4e66 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:37 +0000 Subject: KVM: arm64: Use page-table to track page ownership As the host stage 2 will be identity mapped, all the .hyp memory regions and/or memory pages donated to protected guestis will have to marked invalid in the host stage 2 page-table. At the same time, the hypervisor will need a way to track the ownership of each physical page to ensure memory sharing or donation between entities (host, guests, hypervisor) is legal. In order to enable this tracking at EL2, let's use the host stage 2 page-table itself. The idea is to use the top bits of invalid mappings to store the unique identifier of the page owner. The page-table owner (the host) gets identifier 0 such that, at boot time, it owns the entire IPA space as the pgd starts zeroed. Provide kvm_pgtable_stage2_set_owner() which allows to modify the ownership of pages in the host stage 2. It re-uses most of the map() logic, but ends up creating invalid mappings instead. This impacts how we do refcount as we now need to count invalid mappings when they are used for ownership tracking. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-30-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++ arch/arm64/kvm/hyp/pgtable.c | 126 ++++++++++++++++++++++++++++------- 2 files changed, 122 insertions(+), 24 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4ae19247837b..eea2e2b0acaa 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -238,6 +238,26 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc); +/** + * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to + * track ownership. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Base intermediate physical address to annotate. + * @size: Size of the annotated range. + * @mc: Cache of pre-allocated and zeroed memory from which to allocate + * page-table pages. + * @owner_id: Unique identifier for the owner of the page. + * + * By default, all page-tables are owned by identifier 0. This function can be + * used to mark portions of the IPA space as owned by other entities. When a + * stage 2 is used with identity-mappings, these annotations allow to use the + * page-table data structure as a simple rmap. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, u8 owner_id); + /** * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0a674010afb6..eefb226e1d1e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -48,6 +48,9 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) +#define KVM_MAX_OWNER_ID 1 + struct kvm_pgtable_walk_data { struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; @@ -67,6 +70,13 @@ static u64 kvm_granule_size(u32 level) return BIT(kvm_granule_shift(level)); } +#define KVM_PHYS_INVALID (-1ULL) + +static bool kvm_phys_is_valid(u64 phys) +{ + return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX)); +} + static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) { u64 granule = kvm_granule_size(level); @@ -81,7 +91,10 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) if (granule > (end - addr)) return false; - return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); + if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) + return false; + + return IS_ALIGNED(addr, granule); } static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) @@ -186,6 +199,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) return pte; } +static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) +{ + return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); +} + static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag) @@ -441,6 +459,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct stage2_map_data { u64 phys; kvm_pte_t attr; + u8 owner_id; kvm_pte_t *anchor; kvm_pte_t *childp; @@ -507,6 +526,39 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, return 0; } +static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) +{ + if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) + return true; + + return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)); +} + +static bool stage2_pte_is_counted(kvm_pte_t pte) +{ + /* + * The refcount tracks valid entries as well as invalid entries if they + * encode ownership of a page to another entity than the page-table + * owner, whose id is 0. + */ + return !!pte; +} + +static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, + u32 level, struct kvm_pgtable_mm_ops *mm_ops) +{ + /* + * Clear the existing PTE, and perform break-before-make with + * TLB maintenance if it was valid. + */ + if (kvm_pte_valid(*ptep)) { + kvm_clear_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); + } + + mm_ops->put_page(ptep); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) @@ -518,29 +570,29 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return -E2BIG; - new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (kvm_pte_valid(old)) { + if (kvm_phys_is_valid(phys)) + new = kvm_init_valid_leaf_pte(phys, data->attr, level); + else + new = kvm_init_invalid_leaf_owner(data->owner_id); + + if (stage2_pte_is_counted(old)) { /* * Skip updating the PTE if we are trying to recreate the exact * same mapping or only change the access permissions. Instead, * the vCPU will exit one more time from guest if still needed * and then go through the path of relaxing permissions. */ - if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS))) + if (!stage2_pte_needs_update(old, new)) return -EAGAIN; - /* - * There's an existing different valid leaf entry, so perform - * break-before-make. - */ - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - mm_ops->put_page(ptep); + stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } smp_store_release(ptep, new); - mm_ops->get_page(ptep); - data->phys += granule; + if (stage2_pte_is_counted(new)) + mm_ops->get_page(ptep); + if (kvm_phys_is_valid(phys)) + data->phys += granule; return 0; } @@ -575,7 +627,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, int ret; if (data->anchor) { - if (kvm_pte_valid(pte)) + if (stage2_pte_is_counted(pte)) mm_ops->put_page(ptep); return 0; @@ -600,11 +652,8 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (kvm_pte_valid(pte)) { - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - mm_ops->put_page(ptep); - } + if (stage2_pte_is_counted(pte)) + stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); kvm_set_table_pte(ptep, childp, mm_ops); mm_ops->get_page(ptep); @@ -702,6 +751,32 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, u8 owner_id) +{ + int ret; + struct stage2_map_data map_data = { + .phys = KVM_PHYS_INVALID, + .mmu = pgt->mmu, + .memcache = mc, + .mm_ops = pgt->mm_ops, + .owner_id = owner_id, + }; + struct kvm_pgtable_walker walker = { + .cb = stage2_map_walker, + .flags = KVM_PGTABLE_WALK_TABLE_PRE | + KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + .arg = &map_data, + }; + + if (owner_id > KVM_MAX_OWNER_ID) + return -EINVAL; + + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + return ret; +} + static void stage2_flush_dcache(void *addr, u64 size) { if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) @@ -726,8 +801,13 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, kvm_pte_t pte = *ptep, *childp = NULL; bool need_flush = false; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(pte)) { + if (stage2_pte_is_counted(pte)) { + kvm_clear_pte(ptep); + mm_ops->put_page(ptep); + } return 0; + } if (kvm_pte_table(pte, level)) { childp = kvm_pte_follow(pte, mm_ops); @@ -743,9 +823,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * block entry and rely on the remaining portions being faulted * back lazily. */ - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); - mm_ops->put_page(ptep); + stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), @@ -949,7 +1027,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable_mm_ops *mm_ops = arg; kvm_pte_t pte = *ptep; - if (!kvm_pte_valid(pte)) + if (!stage2_pte_is_counted(pte)) return 0; mm_ops->put_page(ptep); -- cgit v1.2.3 From 3fab82347ffb36c8b7b38dabc8e79276eeb1a81c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:38 +0000 Subject: KVM: arm64: Refactor the *_map_set_prot_attr() helpers In order to ease their re-use in other code paths, refactor the *_map_set_prot_attr() helpers to not depend on a map_data struct. No functional change intended. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-31-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index eefb226e1d1e..f4a514a2e7ae 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -325,8 +325,7 @@ struct hyp_map_data { struct kvm_pgtable_mm_ops *mm_ops; }; -static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, - struct hyp_map_data *data) +static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { bool device = prot & KVM_PGTABLE_PROT_DEVICE; u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; @@ -351,7 +350,8 @@ static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; - data->attr = attr; + *ptep = attr; + return 0; } @@ -408,7 +408,7 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, .arg = &map_data, }; - ret = hyp_map_set_prot_attr(prot, &map_data); + ret = hyp_set_prot_attr(prot, &map_data.attr); if (ret) return ret; @@ -501,8 +501,7 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) return vtcr; } -static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, - struct stage2_map_data *data) +static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { bool device = prot & KVM_PGTABLE_PROT_DEVICE; kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : @@ -522,7 +521,8 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; - data->attr = attr; + *ptep = attr; + return 0; } @@ -742,7 +742,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .arg = &map_data, }; - ret = stage2_map_set_prot_attr(prot, &map_data); + ret = stage2_set_prot_attr(prot, &map_data.attr); if (ret) return ret; -- cgit v1.2.3 From 2fcb3a59401d2d12b5337b62c799eeb22cf40a2c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:39 +0000 Subject: KVM: arm64: Add kvm_pgtable_stage2_find_range() Since the host stage 2 will be identity mapped, and since it will own most of memory, it would preferable for performance to try and use large block mappings whenever that is possible. To ease this, introduce a new helper in the KVM page-table code which allows to search for large ranges of available IPA space. This will be used in the host memory abort path to greedily idmap large portion of the PA space. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-32-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 29 ++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 89 ++++++++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index eea2e2b0acaa..e1fed14aee17 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -94,6 +94,16 @@ enum kvm_pgtable_prot { #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) +/** + * struct kvm_mem_range - Range of Intermediate Physical Addresses + * @start: Start of the range. + * @end: End of the range. + */ +struct kvm_mem_range { + u64 start; + u64 end; +}; + /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -397,4 +407,23 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker); +/** + * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical + * Addresses with compatible permission + * attributes. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Address that must be covered by the range. + * @prot: Protection attributes that the range must be compatible with. + * @range: Range structure used to limit the search space at call time and + * that will hold the result. + * + * The offset of @addr within a page is ignored. An IPA is compatible with @prot + * iff its corresponding stage-2 page-table entry has default ownership and, if + * valid, is mapped with protection attributes identical to @prot. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_prot prot, + struct kvm_mem_range *range); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f4a514a2e7ae..dc6ef2cfe3eb 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -48,6 +48,8 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) +#define KVM_PTE_LEAF_ATTR_S2_IGNORED GENMASK(58, 55) + #define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) #define KVM_MAX_OWNER_ID 1 @@ -77,15 +79,20 @@ static bool kvm_phys_is_valid(u64 phys) return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX)); } -static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) +static bool kvm_level_supports_block_mapping(u32 level) { - u64 granule = kvm_granule_size(level); - /* * Reject invalid block mappings and don't bother with 4TB mappings for * 52-bit PAs. */ - if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1)) + return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); +} + +static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) +{ + u64 granule = kvm_granule_size(level); + + if (!kvm_level_supports_block_mapping(level)) return false; if (granule > (end - addr)) @@ -1053,3 +1060,77 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); pgt->pgd = NULL; } + +#define KVM_PTE_LEAF_S2_COMPAT_MASK (KVM_PTE_LEAF_ATTR_S2_PERMS | \ + KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \ + KVM_PTE_LEAF_ATTR_S2_IGNORED) + +static int stage2_check_permission_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t old_attr, pte = *ptep, *new_attr = arg; + + /* + * Compatible mappings are either invalid and owned by the page-table + * owner (whose id is 0), or valid with matching permission attributes. + */ + if (kvm_pte_valid(pte)) { + old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK; + if (old_attr != *new_attr) + return -EEXIST; + } else if (pte) { + return -EEXIST; + } + + return 0; +} + +int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_prot prot, + struct kvm_mem_range *range) +{ + kvm_pte_t attr; + struct kvm_pgtable_walker check_perm_walker = { + .cb = stage2_check_permission_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = &attr, + }; + u64 granule, start, end; + u32 level; + int ret; + + ret = stage2_set_prot_attr(prot, &attr); + if (ret) + return ret; + attr &= KVM_PTE_LEAF_S2_COMPAT_MASK; + + for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) { + granule = kvm_granule_size(level); + start = ALIGN_DOWN(addr, granule); + end = start + granule; + + if (!kvm_level_supports_block_mapping(level)) + continue; + + if (start < range->start || range->end < end) + continue; + + /* + * Check the presence of existing mappings with incompatible + * permissions within the current block range, and try one level + * deeper if one is found. + */ + ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker); + if (ret != -EEXIST) + break; + } + + if (!ret) { + range->start = start; + range->end = end; + } + + return ret; +} -- cgit v1.2.3 From bc224df155c466178128a2950af16cba37b6f218 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:40 +0000 Subject: KVM: arm64: Introduce KVM_PGTABLE_S2_NOFWB stage 2 flag In order to further configure stage 2 page-tables, pass flags to the init function using a new enum. The first of these flags allows to disable FWB even if the hardware supports it as we will need to do so for the host stage 2. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-33-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 43 ++++++++++++++++++--------- arch/arm64/include/asm/pgtable-prot.h | 4 +-- arch/arm64/kvm/hyp/pgtable.c | 56 +++++++++++++++++++---------------- 3 files changed, 62 insertions(+), 41 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index e1fed14aee17..55452f4831d2 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -56,6 +56,15 @@ struct kvm_pgtable_mm_ops { phys_addr_t (*virt_to_phys)(void *addr); }; +/** + * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags. + * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have + * ARM64_HAS_STAGE2_FWB. + */ +enum kvm_pgtable_stage2_flags { + KVM_PGTABLE_S2_NOFWB = BIT(0), +}; + /** * struct kvm_pgtable - KVM page-table. * @ia_bits: Maximum input address size, in bits. @@ -72,6 +81,7 @@ struct kvm_pgtable { /* Stage-2 only */ struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; }; /** @@ -196,20 +206,25 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); /** - * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. + * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. * @arch: Arch-specific KVM structure representing the guest virtual * machine. * @mm_ops: Memory management callbacks. + * @flags: Stage-2 configuration flags. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, - struct kvm_pgtable_mm_ops *mm_ops); +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags); + +#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \ + kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0) /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * * The page-table is assumed to be unreachable by any hardware walkers prior * to freeing and therefore no TLB invalidation is performed. @@ -218,7 +233,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address at which to place the mapping. * @size: Size of the mapping. * @phys: Physical address of the memory to map. @@ -251,7 +266,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, /** * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to * track ownership. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Base intermediate physical address to annotate. * @size: Size of the annotated range. * @mc: Cache of pre-allocated and zeroed memory from which to allocate @@ -270,7 +285,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, /** * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address from which to remove the mapping. * @size: Size of the mapping. * @@ -290,7 +305,7 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); /** * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range * without TLB invalidation. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address from which to write-protect, * @size: Size of the range. * @@ -307,7 +322,7 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); /** * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * * The offset of @addr within a page is ignored. @@ -321,7 +336,7 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); /** * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * * The offset of @addr within a page is ignored. @@ -340,7 +355,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); /** * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a * page-table entry. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * @prot: Additional permissions to grant for the mapping. * @@ -359,7 +374,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, /** * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the * access flag set. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address to identify the page-table entry. * * The offset of @addr within a page is ignored. @@ -372,7 +387,7 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point * of Coherency for guest stage-2 address * range. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Intermediate physical address from which to flush. * @size: Size of the range. * @@ -411,7 +426,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical * Addresses with compatible permission * attributes. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Address that must be covered by the range. * @prot: Protection attributes that the range must be compatible with. * @range: Range structure used to limit the search space at call time and diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a65fb528110..079f4e9a3e84 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -71,10 +71,10 @@ extern bool arm64_use_ng_mappings; #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) -#define PAGE_S2_MEMATTR(attr) \ +#define PAGE_S2_MEMATTR(attr, has_fwb) \ ({ \ u64 __val; \ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \ + if (has_fwb) \ __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \ else \ __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index dc6ef2cfe3eb..b22b4860630c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -508,11 +508,22 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) return vtcr; } -static int stage2_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) +static bool stage2_has_fwb(struct kvm_pgtable *pgt) +{ + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + return false; + + return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); +} + +#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) + +static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, + kvm_pte_t *ptep) { bool device = prot & KVM_PGTABLE_PROT_DEVICE; - kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : - PAGE_S2_MEMATTR(NORMAL); + kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) : + KVM_S2_MEMATTR(pgt, NORMAL); u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; if (!(prot & KVM_PGTABLE_PROT_X)) @@ -749,7 +760,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .arg = &map_data, }; - ret = stage2_set_prot_attr(prot, &map_data.attr); + ret = stage2_set_prot_attr(pgt, prot, &map_data.attr); if (ret) return ret; @@ -784,18 +795,10 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static void stage2_flush_dcache(void *addr, u64 size) -{ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) - return; - - __flush_dcache_area(addr, size); -} - -static bool stage2_pte_cacheable(kvm_pte_t pte) +static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) { u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == PAGE_S2_MEMATTR(NORMAL); + return memattr == KVM_S2_MEMATTR(pgt, NORMAL); } static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, @@ -821,8 +824,8 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (mm_ops->page_count(childp) != 1) return 0; - } else if (stage2_pte_cacheable(pte)) { - need_flush = true; + } else if (stage2_pte_cacheable(pgt, pte)) { + need_flush = !stage2_has_fwb(pgt); } /* @@ -833,7 +836,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { - stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), + __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); } @@ -979,13 +982,14 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { - struct kvm_pgtable_mm_ops *mm_ops = arg; + struct kvm_pgtable *pgt = arg; + struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; - if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) + if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - stage2_flush_dcache(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); + __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); return 0; } @@ -994,17 +998,18 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) struct kvm_pgtable_walker walker = { .cb = stage2_flush_walker, .flags = KVM_PGTABLE_WALK_LEAF, - .arg = pgt->mm_ops, + .arg = pgt, }; - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + if (stage2_has_fwb(pgt)) return 0; return kvm_pgtable_walk(pgt, addr, size, &walker); } -int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, - struct kvm_pgtable_mm_ops *mm_ops) +int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags) { size_t pgd_sz; u64 vtcr = arch->vtcr; @@ -1021,6 +1026,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, pgt->start_level = start_level; pgt->mm_ops = mm_ops; pgt->mmu = &arch->mmu; + pgt->flags = flags; /* Ensure zeroed PGD pages are visible to the hardware walker */ dsb(ishst); @@ -1101,7 +1107,7 @@ int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, u32 level; int ret; - ret = stage2_set_prot_attr(prot, &attr); + ret = stage2_set_prot_attr(pgt, prot, &attr); if (ret) return ret; attr &= KVM_PTE_LEAF_S2_COMPAT_MASK; -- cgit v1.2.3 From 8942a237c771b65f8bc1232536e4b4b829c7701f Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:41 +0000 Subject: KVM: arm64: Introduce KVM_PGTABLE_S2_IDMAP stage 2 flag Introduce a new stage 2 configuration flag to specify that all mappings in a given page-table will be identity-mapped, as will be the case for the host. This allows to introduce sanity checks in the map path and to avoid programming errors. Suggested-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-34-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 2 ++ arch/arm64/kvm/hyp/pgtable.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 55452f4831d2..c3674c47d48c 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -60,9 +60,11 @@ struct kvm_pgtable_mm_ops { * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags. * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have * ARM64_HAS_STAGE2_FWB. + * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings. */ enum kvm_pgtable_stage2_flags { KVM_PGTABLE_S2_NOFWB = BIT(0), + KVM_PGTABLE_S2_IDMAP = BIT(1), }; /** diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b22b4860630c..c37c1dc4feaf 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -760,6 +760,9 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .arg = &map_data, }; + if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys))) + return -EINVAL; + ret = stage2_set_prot_attr(pgt, prot, &map_data.attr); if (ret) return ret; -- cgit v1.2.3 From def1aaf9e0bc6987bb4b417aac37226e994a1a74 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:42 +0000 Subject: KVM: arm64: Provide sanitized mmfr* registers at EL2 We will need to read sanitized values of mmfr{0,1}_el1 at EL2 soon, so add them to the list of copied variables. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-35-qperret@google.com --- arch/arm64/include/asm/kvm_cpufeature.h | 2 ++ arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 2 ++ arch/arm64/kvm/sys_regs.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/kvm_cpufeature.h b/arch/arm64/include/asm/kvm_cpufeature.h index c2e7735f502b..ff302d15e840 100644 --- a/arch/arm64/include/asm/kvm_cpufeature.h +++ b/arch/arm64/include/asm/kvm_cpufeature.h @@ -20,5 +20,7 @@ #endif DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_ctrel0); +DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr0_el1); +DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr1_el1); #endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 71f00aca90e7..17ad1b3a9530 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -13,6 +13,8 @@ * Copies of the host's CPU features registers holding sanitized values. */ DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_ctrel0); +DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr0_el1); +DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr1_el1); /* * nVHE copy of data structures tracking available CPU cores. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3ec34c25e877..dfb3b4f9ca84 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2784,6 +2784,8 @@ struct __ftr_reg_copy_entry { struct arm64_ftr_reg *dst; } hyp_ftr_regs[] __initdata = { CPU_FTR_REG_HYP_COPY(SYS_CTR_EL0, arm64_ftr_reg_ctrel0), + CPU_FTR_REG_HYP_COPY(SYS_ID_AA64MMFR0_EL1, arm64_ftr_reg_id_aa64mmfr0_el1), + CPU_FTR_REG_HYP_COPY(SYS_ID_AA64MMFR1_EL1, arm64_ftr_reg_id_aa64mmfr1_el1), }; void __init setup_kvm_el2_caps(void) -- cgit v1.2.3 From 1025c8c0c6accfcbdc8f52ca1940160f65cd87d6 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:43 +0000 Subject: KVM: arm64: Wrap the host with a stage 2 When KVM runs in protected nVHE mode, make use of a stage 2 page-table to give the hypervisor some control over the host memory accesses. The host stage 2 is created lazily using large block mappings if possible, and will default to page mappings in absence of a better solution. >From this point on, memory accesses from the host to protected memory regions (e.g. not 'owned' by the host) are fatal and lead to hyp_panic(). Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-36-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kernel/image-vars.h | 3 + arch/arm64/kvm/arm.c | 10 ++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 34 ++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 10 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 246 ++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 5 + arch/arm64/kvm/hyp/nvhe/switch.c | 7 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 4 +- 11 files changed, 316 insertions(+), 7 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h create mode 100644 arch/arm64/kvm/hyp/nvhe/mem_protect.c diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 08f63c09cd11..4149283b4cd1 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -61,6 +61,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 +#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 940c378fa837..d5dc2b792651 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -131,6 +131,9 @@ KVM_NVHE_ALIAS(__hyp_bss_end); KVM_NVHE_ALIAS(__hyp_rodata_start); KVM_NVHE_ALIAS(__hyp_rodata_end); +/* pKVM static key */ +KVM_NVHE_ALIAS(kvm_protected_mode_initialized); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a6b5ba195ca9..d237c378e6fb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1894,12 +1894,22 @@ out_err: return err; } +void _kvm_host_prot_finalize(void *discard) +{ + WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); +} + static int finalize_hyp_mode(void) { if (!is_protected_kvm_enabled()) return 0; + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, NULL, 1); return 0; } diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h new file mode 100644 index 000000000000..d293cb328cc4 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret + */ + +#ifndef __KVM_NVHE_MEM_PROTECT__ +#define __KVM_NVHE_MEM_PROTECT__ +#include +#include +#include +#include +#include + +struct host_kvm { + struct kvm_arch arch; + struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + hyp_spinlock_t lock; +}; +extern struct host_kvm host_kvm; + +int __pkvm_prot_finalize(void); +int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); +void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); + +static __always_inline void __load_host_stage2(void) +{ + if (static_branch_likely(&kvm_protected_mode_initialized)) + __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr); + else + write_sysreg(0, vttbr_el2); +} +#endif /* __KVM_NVHE_MEM_PROTECT__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index b334354b8dd0..f55201a7ff33 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o setup.o mm.o + cache.o setup.o mm.o mem_protect.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index a50ad9e9fc05..c164045af238 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -119,6 +119,7 @@ alternative_else_nop_endif /* Invalidate the stale TLBs from Bootloader */ tlbi alle2 + tlbi vmalls12e1 dsb sy /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a571cee99a5c..69163f2cbb63 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -151,6 +152,10 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot); } +static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); +} typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -174,6 +179,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_cpu_set_vector), HANDLE_FUNC(__pkvm_create_mappings), HANDLE_FUNC(__pkvm_create_private_mapping), + HANDLE_FUNC(__pkvm_prot_finalize), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) @@ -231,6 +237,10 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) isb(); sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); break; + case ESR_ELx_EC_IABT_LOW: + case ESR_ELx_EC_DABT_LOW: + handle_host_mem_abort(host_ctxt); + break; default: hyp_panic(); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c new file mode 100644 index 000000000000..77b48c47344d --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) + +extern unsigned long hyp_nr_cpus; +struct host_kvm host_kvm; + +struct hyp_pool host_s2_mem; +struct hyp_pool host_s2_dev; + +static void *host_s2_zalloc_pages_exact(size_t size) +{ + return hyp_alloc_pages(&host_s2_mem, get_order(size)); +} + +static void *host_s2_zalloc_page(void *pool) +{ + return hyp_alloc_pages(pool, 0); +} + +static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) +{ + unsigned long nr_pages, pfn; + int ret; + + pfn = hyp_virt_to_pfn(mem_pgt_pool); + nr_pages = host_s2_mem_pgtable_pages(); + ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0); + if (ret) + return ret; + + pfn = hyp_virt_to_pfn(dev_pgt_pool); + nr_pages = host_s2_dev_pgtable_pages(); + ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0); + if (ret) + return ret; + + host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_pages_exact = host_s2_zalloc_pages_exact, + .zalloc_page = host_s2_zalloc_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .page_count = hyp_page_count, + .get_page = hyp_get_page, + .put_page = hyp_put_page, + }; + + return 0; +} + +static void prepare_host_vtcr(void) +{ + u32 parange, phys_shift; + u64 mmfr0, mmfr1; + + mmfr0 = arm64_ftr_reg_id_aa64mmfr0_el1.sys_val; + mmfr1 = arm64_ftr_reg_id_aa64mmfr1_el1.sys_val; + + /* The host stage 2 is id-mapped, so use parange for T0SZ */ + parange = kvm_get_parange(mmfr0); + phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); + + host_kvm.arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); +} + +int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) +{ + struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + int ret; + + prepare_host_vtcr(); + hyp_spin_lock_init(&host_kvm.lock); + + ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool); + if (ret) + return ret; + + ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch, + &host_kvm.mm_ops, KVM_HOST_S2_FLAGS); + if (ret) + return ret; + + mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); + mmu->arch = &host_kvm.arch; + mmu->pgt = &host_kvm.pgt; + mmu->vmid.vmid_gen = 0; + mmu->vmid.vmid = 0; + + return 0; +} + +int __pkvm_prot_finalize(void) +{ + struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); + + params->vttbr = kvm_get_vttbr(mmu); + params->vtcr = host_kvm.arch.vtcr; + params->hcr_el2 |= HCR_VM; + kvm_flush_dcache_to_poc(params, sizeof(*params)); + + write_sysreg(params->hcr_el2, hcr_el2); + __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr); + + /* + * Make sure to have an ISB before the TLB maintenance below but only + * when __load_stage2() doesn't include one already. + */ + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); + + /* Invalidate stale HCR bits that may be cached in TLBs */ + __tlbi(vmalls12e1); + dsb(nsh); + isb(); + + return 0; +} + +static int host_stage2_unmap_dev_all(void) +{ + struct kvm_pgtable *pgt = &host_kvm.pgt; + struct memblock_region *reg; + u64 addr = 0; + int i, ret; + + /* Unmap all non-memory regions to recycle the pages */ + for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { + reg = &hyp_memory[i]; + ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); + if (ret) + return ret; + } + return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); +} + +static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) +{ + int cur, left = 0, right = hyp_memblock_nr; + struct memblock_region *reg; + phys_addr_t end; + + range->start = 0; + range->end = ULONG_MAX; + + /* The list of memblock regions is sorted, binary search it */ + while (left < right) { + cur = (left + right) >> 1; + reg = &hyp_memory[cur]; + end = reg->base + reg->size; + if (addr < reg->base) { + right = cur; + range->end = reg->base; + } else if (addr >= end) { + left = cur + 1; + range->start = end; + } else { + range->start = reg->base; + range->end = end; + return true; + } + } + + return false; +} + +static inline int __host_stage2_idmap(u64 start, u64 end, + enum kvm_pgtable_prot prot, + struct hyp_pool *pool) +{ + return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, + prot, pool); +} + +static int host_stage2_idmap(u64 addr) +{ + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; + struct kvm_mem_range range; + bool is_memory = find_mem_range(addr, &range); + struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev; + int ret; + + if (is_memory) + prot |= KVM_PGTABLE_PROT_X; + + hyp_spin_lock(&host_kvm.lock); + ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range); + if (ret) + goto unlock; + + ret = __host_stage2_idmap(range.start, range.end, prot, pool); + if (is_memory || ret != -ENOMEM) + goto unlock; + + /* + * host_s2_mem has been provided with enough pages to cover all of + * memory with page granularity, so we should never hit the ENOMEM case. + * However, it is difficult to know how much of the MMIO range we will + * need to cover upfront, so we may need to 'recycle' the pages if we + * run out. + */ + ret = host_stage2_unmap_dev_all(); + if (ret) + goto unlock; + + ret = __host_stage2_idmap(range.start, range.end, prot, pool); + +unlock: + hyp_spin_unlock(&host_kvm.lock); + + return ret; +} + +void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_vcpu_fault_info fault; + u64 esr, addr; + int ret = 0; + + esr = read_sysreg_el2(SYS_ESR); + if (!__get_fault_info(esr, &fault)) + hyp_panic(); + + addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + ret = host_stage2_idmap(addr); + if (ret && ret != -EAGAIN) + hyp_panic(); +} diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index c1a3e7e0ebbc..7488f53b0aa2 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -157,6 +158,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base); + if (ret) + goto out; + pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 99323563022a..5fb570e68831 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -28,6 +28,8 @@ #include #include +#include + /* Non-VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); @@ -107,11 +109,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(__kvm_hyp_host_vector, vbar_el2); } -static void __load_host_stage2(void) -{ - write_sysreg(0, vttbr_el2); -} - /* Save VGICv3 state on non-VHE systems */ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 229b06748c20..83dc3b271bc5 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -8,6 +8,8 @@ #include #include +#include + struct tlb_inv_context { u64 tcr; }; @@ -43,7 +45,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, static void __tlb_switch_to_host(struct tlb_inv_context *cxt) { - write_sysreg(0, vttbr_el2); + __load_host_stage2(); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Ensure write of the host VMID */ -- cgit v1.2.3 From b83042f0f143a5e9e899924987b542b2ac766e53 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:44 +0000 Subject: KVM: arm64: Page-align the .hyp sections We will soon unmap the .hyp sections from the host stage 2 in Protected nVHE mode, which obviously works with at least page granularity, so make sure to align them correctly. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-37-qperret@google.com --- arch/arm64/kernel/vmlinux.lds.S | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index e96173ce211b..709d2c433c5e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -15,9 +15,11 @@ #define HYPERVISOR_DATA_SECTIONS \ HYP_SECTION_NAME(.rodata) : { \ + . = ALIGN(PAGE_SIZE); \ __hyp_rodata_start = .; \ *(HYP_SECTION_NAME(.data..ro_after_init)) \ *(HYP_SECTION_NAME(.rodata)) \ + . = ALIGN(PAGE_SIZE); \ __hyp_rodata_end = .; \ } @@ -72,21 +74,14 @@ ENTRY(_text) jiffies = jiffies_64; #define HYPERVISOR_TEXT \ - /* \ - * Align to 4 KB so that \ - * a) the HYP vector table is at its minimum \ - * alignment of 2048 bytes \ - * b) the HYP init code will not cross a page \ - * boundary if its size does not exceed \ - * 4 KB (see related ASSERT() below) \ - */ \ - . = ALIGN(SZ_4K); \ + . = ALIGN(PAGE_SIZE); \ __hyp_idmap_text_start = .; \ *(.hyp.idmap.text) \ __hyp_idmap_text_end = .; \ __hyp_text_start = .; \ *(.hyp.text) \ HYPERVISOR_EXTABLE \ + . = ALIGN(PAGE_SIZE); \ __hyp_text_end = .; #define IDMAP_TEXT \ @@ -322,11 +317,12 @@ SECTIONS #include "image-vars.h" /* - * The HYP init code and ID map text can't be longer than a page each, - * and should not cross a page boundary. + * The HYP init code and ID map text can't be longer than a page each. The + * former is page-aligned, but the latter may not be with 16K or 64K pages, so + * it should also not cross a page boundary. */ -ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, - "HYP init code too big or misaligned") +ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE, + "HYP init code too big") ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From 9589a38cdfeba0889590e6ef4627b439034d456c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:45 +0000 Subject: KVM: arm64: Disable PMU support in protected mode The host currently writes directly in EL2 per-CPU data sections from the PMU code when running in nVHE. In preparation for unmapping the EL2 sections from the host stage 2, disable PMU support in protected mode as we currently do not have a use-case for it. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-38-qperret@google.com --- arch/arm64/kvm/perf.c | 3 ++- arch/arm64/kvm/pmu.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c index 739164324afe..8f860ae56bb7 100644 --- a/arch/arm64/kvm/perf.c +++ b/arch/arm64/kvm/perf.c @@ -55,7 +55,8 @@ int kvm_perf_init(void) * hardware performance counters. This could ensure the presence of * a physical PMU and CONFIG_PERF_EVENT is selected. */ - if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0) + if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0 + && !is_protected_kvm_enabled()) static_branch_enable(&kvm_arm_pmu_available); return perf_register_guest_info_callbacks(&kvm_guest_cbs); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index faf32a44ba04..03a6c1f4a09a 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -33,7 +33,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); - if (!ctx || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) @@ -49,7 +49,7 @@ void kvm_clr_pmu_events(u32 clr) { struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); - if (!ctx) + if (!kvm_arm_support_pmu_v3() || !ctx) return; ctx->pmu_events.events_host &= ~clr; @@ -172,7 +172,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) struct kvm_host_data *host; u32 events_guest, events_host; - if (!has_vhe()) + if (!kvm_arm_support_pmu_v3() || !has_vhe()) return; preempt_disable(); @@ -193,7 +193,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) struct kvm_host_data *host; u32 events_guest, events_host; - if (!has_vhe()) + if (!kvm_arm_support_pmu_v3() || !has_vhe()) return; host = this_cpu_ptr_hyp_sym(kvm_host_data); -- cgit v1.2.3 From 90134ac9cabb69972d0a509bf08e108a73442184 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 19 Mar 2021 10:01:46 +0000 Subject: KVM: arm64: Protect the .hyp sections from the host When KVM runs in nVHE protected mode, use the host stage 2 to unmap the hypervisor sections by marking them as owned by the hypervisor itself. The long-term goal is to ensure the EL2 code can remain robust regardless of the host's state, so this starts by making sure the host cannot e.g. write to the .hyp sections directly. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319100146.1149909-39-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kvm/arm.c | 46 +++++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ++++++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 33 +++++++++++++++++++ 5 files changed, 91 insertions(+) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 4149283b4cd1..cf8df032b9c3 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -62,6 +62,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 +#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d237c378e6fb..368159021dee 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1899,11 +1899,57 @@ void _kvm_host_prot_finalize(void *discard) WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); } +static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) +{ + return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end); +} + +#define pkvm_mark_hyp_section(__section) \ + pkvm_mark_hyp(__pa_symbol(__section##_start), \ + __pa_symbol(__section##_end)) + static int finalize_hyp_mode(void) { + int cpu, ret; + if (!is_protected_kvm_enabled()) return 0; + ret = pkvm_mark_hyp_section(__hyp_idmap_text); + if (ret) + return ret; + + ret = pkvm_mark_hyp_section(__hyp_text); + if (ret) + return ret; + + ret = pkvm_mark_hyp_section(__hyp_rodata); + if (ret) + return ret; + + ret = pkvm_mark_hyp_section(__hyp_bss); + if (ret) + return ret; + + ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size); + if (ret) + return ret; + + for_each_possible_cpu(cpu) { + phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]); + phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order()); + + ret = pkvm_mark_hyp(start, end); + if (ret) + return ret; + + start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu)); + end = start + PAGE_SIZE; + ret = pkvm_mark_hyp(start, end); + if (ret) + return ret; + } + /* * Flip the static key upfront as that may no longer be possible * once the host stage 2 is installed. diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index d293cb328cc4..42d81ec739fa 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -21,6 +21,8 @@ struct host_kvm { extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); +int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); + int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 69163f2cbb63..b4eaa7ef13e0 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -156,6 +156,14 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) { cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } + +static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(phys_addr_t, start, host_ctxt, 1); + DECLARE_REG(phys_addr_t, end, host_ctxt, 2); + + cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end); +} typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -180,6 +188,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_create_mappings), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), + HANDLE_FUNC(__pkvm_mark_hyp), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 77b48c47344d..808e2471091b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -27,6 +27,8 @@ struct host_kvm host_kvm; struct hyp_pool host_s2_mem; struct hyp_pool host_s2_dev; +static const u8 pkvm_hyp_id = 1; + static void *host_s2_zalloc_pages_exact(size_t size) { return hyp_alloc_pages(&host_s2_mem, get_order(size)); @@ -182,6 +184,18 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) return false; } +static bool range_is_memory(u64 start, u64 end) +{ + struct kvm_mem_range r1, r2; + + if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + return false; + if (r1.start != r2.start) + return false; + + return true; +} + static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot, struct hyp_pool *pool) @@ -229,6 +243,25 @@ unlock: return ret; } +int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) +{ + int ret; + + /* + * host_stage2_unmap_dev_all() currently relies on MMIO mappings being + * non-persistent, so don't allow changing page ownership in MMIO range. + */ + if (!range_is_memory(start, end)) + return -EINVAL; + + hyp_spin_lock(&host_kvm.lock); + ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, + &host_s2_mem, pkvm_hyp_id); + hyp_spin_unlock(&host_kvm.lock); + + return ret != -EAGAIN ? ret : 0; +} + void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; -- cgit v1.2.3 From 755db23420a1ce4b740186543432983e9bbe713e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Mar 2021 12:09:51 +0000 Subject: KVM: arm64: Generate final CTR_EL0 value when running in Protected mode In protected mode, late CPUs are not allowed to boot (enforced by the PSCI relay). We can thus specialise the read_ctr macro to always return a pre-computed, sanitised value. Special care is taken to prevent the use of this custome version outside of the protected mode. Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/assembler.h | 13 +++++++++++++ arch/arm64/kernel/image-vars.h | 1 + arch/arm64/kvm/va_layout.c | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index fb651c1f26e9..34ddd8a0f3dd 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -270,12 +271,24 @@ alternative_endif * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val */ .macro read_ctr, reg +#ifndef __KVM_NVHE_HYPERVISOR__ alternative_if_not ARM64_MISMATCHED_CACHE_TYPE mrs \reg, ctr_el0 // read CTR nop alternative_else ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL alternative_endif +#else +alternative_if_not ARM64_KVM_PROTECTED_MODE + ASM_BUG() +alternative_else_nop_endif +alternative_cb kvm_compute_final_ctr_el0 + movz \reg, #0 + movk \reg, #0, lsl #16 + movk \reg, #0, lsl #32 + movk \reg, #0, lsl #48 +alternative_cb_end +#endif .endm diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index d5dc2b792651..fdd60cd1d7e8 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -65,6 +65,7 @@ __efistub__ctype = _ctype; KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); +KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 978301392d67..acdb7b3cc97d 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -288,3 +288,10 @@ void kvm_get_kimage_voffset(struct alt_instr *alt, { generate_mov_q(kimage_voffset, origptr, updptr, nr_inst); } + +void kvm_compute_final_ctr_el0(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0), + origptr, updptr, nr_inst); +} -- cgit v1.2.3 From 7c4199375ae347449fbde43cc8bf174ae6383d8e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Mar 2021 13:32:34 +0000 Subject: KVM: arm64: Drop the CPU_FTR_REG_HYP_COPY infrastructure Now that the read_ctr macro has been specialised for nVHE, the whole CPU_FTR_REG_HYP_COPY infrastrcture looks completely overengineered. Simplify it by populating the two u64 quantities (MMFR0 and 1) that the hypervisor need. Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/cpufeature.h | 1 - arch/arm64/include/asm/kvm_cpufeature.h | 26 -------------------------- arch/arm64/include/asm/kvm_host.h | 4 ---- arch/arm64/include/asm/kvm_hyp.h | 3 +++ arch/arm64/kernel/cpufeature.c | 13 ------------- arch/arm64/kvm/arm.c | 3 +++ arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 8 -------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 16 +++++++++------- arch/arm64/kvm/sys_regs.c | 22 ---------------------- 9 files changed, 15 insertions(+), 81 deletions(-) delete mode 100644 arch/arm64/include/asm/kvm_cpufeature.h diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a85cea2cac57..61177bac49fa 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -607,7 +607,6 @@ void check_local_cpu_capabilities(void); u64 read_sanitised_ftr_reg(u32 id); u64 __read_sysreg_by_encoding(u32 sys_id); -int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst); static inline bool cpu_supports_mixed_endian_el0(void) { diff --git a/arch/arm64/include/asm/kvm_cpufeature.h b/arch/arm64/include/asm/kvm_cpufeature.h deleted file mode 100644 index ff302d15e840..000000000000 --- a/arch/arm64/include/asm/kvm_cpufeature.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2020 - Google LLC - * Author: Quentin Perret - */ - -#ifndef __ARM64_KVM_CPUFEATURE_H__ -#define __ARM64_KVM_CPUFEATURE_H__ - -#include - -#include - -#if defined(__KVM_NVHE_HYPERVISOR__) -#define DECLARE_KVM_HYP_CPU_FTR_REG(name) extern struct arm64_ftr_reg name -#define DEFINE_KVM_HYP_CPU_FTR_REG(name) struct arm64_ftr_reg name -#else -#define DECLARE_KVM_HYP_CPU_FTR_REG(name) extern struct arm64_ftr_reg kvm_nvhe_sym(name) -#define DEFINE_KVM_HYP_CPU_FTR_REG(name) BUILD_BUG() -#endif - -DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_ctrel0); -DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr0_el1); -DECLARE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr1_el1); - -#endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4859c9de75d7..09979cdec28b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -740,13 +740,9 @@ void kvm_clr_pmu_events(u32 clr); void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); - -void setup_kvm_el2_caps(void); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} - -static inline void setup_kvm_el2_caps(void) {} #endif void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index de40a565d7e5..8ef9d88826d4 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -116,4 +116,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); #endif +extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6252476e4e73..066030717a4c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1154,18 +1154,6 @@ u64 read_sanitised_ftr_reg(u32 id) } EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); -int copy_ftr_reg(u32 id, struct arm64_ftr_reg *dst) -{ - struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); - - if (!regp) - return -EINVAL; - - *dst = *regp; - - return 0; -} - #define read_sysreg_case(r) \ case r: val = read_sysreg_s(r); break; @@ -2785,7 +2773,6 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - setup_kvm_el2_caps(); if (system_supports_32bit_el0()) setup_elf_hwcaps(compat_elf_hwcaps); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 368159021dee..2835400fd298 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1730,6 +1730,9 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) void *addr = phys_to_virt(hyp_mem_base); int ret; + kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) return ret; diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 17ad1b3a9530..879559057dee 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -5,17 +5,9 @@ */ #include -#include #include #include -/* - * Copies of the host's CPU features registers holding sanitized values. - */ -DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_ctrel0); -DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr0_el1); -DEFINE_KVM_HYP_CPU_FTR_REG(arm64_ftr_reg_id_aa64mmfr1_el1); - /* * nVHE copy of data structures tracking available CPU cores. * Only entries for CPUs that were online at KVM init are populated. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 808e2471091b..f4f364aa3282 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -5,7 +5,6 @@ */ #include -#include #include #include #include @@ -27,6 +26,12 @@ struct host_kvm host_kvm; struct hyp_pool host_s2_mem; struct hyp_pool host_s2_dev; +/* + * Copies of the host's CPU features registers holding sanitized values. + */ +u64 id_aa64mmfr0_el1_sys_val; +u64 id_aa64mmfr1_el1_sys_val; + static const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) @@ -72,16 +77,13 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) static void prepare_host_vtcr(void) { u32 parange, phys_shift; - u64 mmfr0, mmfr1; - - mmfr0 = arm64_ftr_reg_id_aa64mmfr0_el1.sys_val; - mmfr1 = arm64_ftr_reg_id_aa64mmfr1_el1.sys_val; /* The host stage 2 is id-mapped, so use parange for T0SZ */ - parange = kvm_get_parange(mmfr0); + parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_kvm.arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + id_aa64mmfr1_el1_sys_val, phys_shift); } int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index dfb3b4f9ca84..4f2f1e3145de 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -2776,24 +2775,3 @@ void kvm_sys_reg_table_init(void) /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; } - -#define CPU_FTR_REG_HYP_COPY(id, name) \ - { .sys_id = id, .dst = (struct arm64_ftr_reg *)&kvm_nvhe_sym(name) } -struct __ftr_reg_copy_entry { - u32 sys_id; - struct arm64_ftr_reg *dst; -} hyp_ftr_regs[] __initdata = { - CPU_FTR_REG_HYP_COPY(SYS_CTR_EL0, arm64_ftr_reg_ctrel0), - CPU_FTR_REG_HYP_COPY(SYS_ID_AA64MMFR0_EL1, arm64_ftr_reg_id_aa64mmfr0_el1), - CPU_FTR_REG_HYP_COPY(SYS_ID_AA64MMFR1_EL1, arm64_ftr_reg_id_aa64mmfr1_el1), -}; - -void __init setup_kvm_el2_caps(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(hyp_ftr_regs); i++) { - WARN(copy_ftr_reg(hyp_ftr_regs[i].sys_id, hyp_ftr_regs[i].dst), - "%u feature register not found\n", hyp_ftr_regs[i].sys_id); - } -} -- cgit v1.2.3 From b1306fef1f48c0af1d659c18c53cf275fdcc94be Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 31 Mar 2021 15:36:19 +0800 Subject: KVM: arm64: Make symbol '_kvm_host_prot_finalize' static The sparse tool complains as follows: arch/arm64/kvm/arm.c:1900:6: warning: symbol '_kvm_host_prot_finalize' was not declared. Should it be static? This symbol is not used outside of arm.c, so this commit marks it static. Reported-by: Hulk Robot Signed-off-by: Xu Jia Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1617176179-31931-1-git-send-email-xujia39@huawei.com --- arch/arm64/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 2835400fd298..76a7de16e5a6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1897,7 +1897,7 @@ out_err: return err; } -void _kvm_host_prot_finalize(void *discard) +static void _kvm_host_prot_finalize(void *discard) { WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); } -- cgit v1.2.3