diff options
author | Marc Zyngier <maz@kernel.org> | 2022-05-04 09:42:37 +0100 |
---|---|---|
committer | Marc Zyngier <maz@kernel.org> | 2022-05-04 09:42:37 +0100 |
commit | 904cabf4712426fdcb902b149b97c62ac0fbc8f6 (patch) | |
tree | 5eec3b136099513bd8bb8221c2a5170adbea43d9 /arch/arm64/kvm | |
parent | b2c4caf3316ce535af333bbd2cd99ad96f991074 (diff) | |
parent | 6ccf9cb557bd32073b0d68baed97f1bd8a40ff1d (diff) | |
download | linux-904cabf4712426fdcb902b149b97c62ac0fbc8f6.tar.bz2 |
Merge branch kvm-arm64/hyp-stack-guard into kvmarm-master/next
* kvm-arm64/hyp-stack-guard:
: .
: Harden the EL2 stack by providing stack guards, courtesy of
: Kalesh Singh.
: .
KVM: arm64: Symbolize the nVHE HYP addresses
KVM: arm64: Detect and handle hypervisor stack overflows
KVM: arm64: Add guard pages for pKVM (protected nVHE) hypervisor stack
KVM: arm64: Add guard pages for KVM nVHE hypervisor stack
KVM: arm64: Introduce pkvm_alloc_private_va_range()
KVM: arm64: Introduce hyp_alloc_private_va_range()
Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r-- | arch/arm64/kvm/arm.c | 37 | ||||
-rw-r--r-- | arch/arm64/kvm/handle_exit.c | 13 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mm.h | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/host.S | 24 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 18 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mm.c | 78 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 31 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/switch.c | 7 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 68 |
9 files changed, 214 insertions, 68 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e7cb8a4d2e81..cee02ea86159 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1479,7 +1479,6 @@ static void cpu_prepare_hyp_mode(int cpu) tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; params->tcr_el2 = tcr; - params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE); params->pgd_pa = kvm_mmu_get_httbr(); if (is_protected_kvm_enabled()) params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; @@ -1929,14 +1928,46 @@ static int init_hyp_mode(void) * Map the Hyp stack pages */ for_each_possible_cpu(cpu) { + struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, - PAGE_HYP); + unsigned long hyp_addr; + /* + * Allocate a contiguous HYP private VA range for the stack + * and guard page. The allocation is also aligned based on + * the order of its size. + */ + err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + if (err) { + kvm_err("Cannot allocate hyp stack guard page\n"); + goto out_err; + } + + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE, + __pa(stack_page), PAGE_HYP); if (err) { kvm_err("Cannot map hyp stack\n"); goto out_err; } + + /* + * Save the stack PA in nvhe_init_params. This will be needed + * to recreate the stack mapping in protected nVHE mode. + * __hyp_pa() won't do the right thing there, since the stack + * has been mapped in the flexible private VA space. + */ + params->stack_pa = __pa(stack_page); + + params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } for_each_possible_cpu(cpu) { diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 7726b01dc09a..c9f1310e2e5f 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -322,13 +322,8 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_in_kimg = __phys_to_kimg(elr_phys); u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt; u64 mode = spsr & PSR_MODE_MASK; + u64 panic_addr = elr_virt + hyp_offset; - /* - * The nVHE hyp symbols are not included by kallsyms to avoid issues - * with aliasing. That means that the symbols cannot be printed with the - * "%pS" format specifier, so fall back to the vmlinux address if - * there's no better option. - */ if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) { kvm_err("Invalid host exception to nVHE hyp!\n"); } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && @@ -348,9 +343,11 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, if (file) kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else - kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset); + kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr, + (void *)panic_addr); } else { - kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset); + kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr, + (void *)panic_addr); } /* diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 2d08510c6cc1..42d8eb9bfe72 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -19,8 +19,10 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); -unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, - enum kvm_pgtable_prot prot); +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot, + unsigned long *haddr); +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, unsigned long *start, unsigned long *end) diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 727c979b2b69..20bf1327e6b9 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -153,6 +153,18 @@ SYM_FUNC_END(__host_hvc) .macro invalid_host_el2_vect .align 7 + + /* + * Test whether the SP has overflowed, without corrupting a GPR. + * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit + * of SP should always be 1. + */ + add sp, sp, x0 // sp' = sp + x0 + sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp + tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@ + sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 + sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp + /* If a guest is loaded, panic out of it. */ stp x0, x1, [sp, #-16]! get_loaded_vcpu x0, x1 @@ -165,6 +177,18 @@ SYM_FUNC_END(__host_hvc) * been partially clobbered by __host_enter. */ b hyp_panic + +.L__hyp_sp_overflow\@: + /* + * Reset SP to the top of the stack, to allow handling the hyp_panic. + * This corrupts the stack but is ok, since we won't be attempting + * any unwinding here. + */ + ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1 + mov sp, x0 + + b hyp_panic_bad_stack + ASM_BUG() .endm .macro invalid_host_el1_vect diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 5e2197db0d32..3cea4b6ac23e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -160,7 +160,23 @@ static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ct DECLARE_REG(size_t, size, host_ctxt, 2); DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); - cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot); + /* + * __pkvm_create_private_mapping() populates a pointer with the + * hypervisor start address of the allocation. + * + * However, handle___pkvm_create_private_mapping() hypercall crosses the + * EL1/EL2 boundary so the pointer would not be valid in this context. + * + * Instead pass the allocation address as the return value (or return + * ERR_PTR() on failure). + */ + unsigned long haddr; + int err = __pkvm_create_private_mapping(phys, size, prot, &haddr); + + if (err) + haddr = (unsigned long)ERR_PTR(err); + + cpu_reg(host_ctxt, 1) = haddr; } static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index cdbe8e246418..96193cb31a39 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -37,36 +37,60 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, return err; } -unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, - enum kvm_pgtable_prot prot) +/** + * pkvm_alloc_private_va_range - Allocates a private VA range. + * @size: The size of the VA range to reserve. + * @haddr: The hypervisor virtual start address of the allocation. + * + * The private virtual address (VA) range is allocated above __io_map_base + * and aligned based on the order of @size. + * + * Return: 0 on success or negative error code on failure. + */ +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr) { - unsigned long addr; - int err; + unsigned long base, addr; + int ret = 0; hyp_spin_lock(&pkvm_pgd_lock); - size = PAGE_ALIGN(size + offset_in_page(phys)); - addr = __io_map_base; - __io_map_base += size; + /* Align the allocation based on the order of its size */ + addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size)); - /* Are we overflowing on the vmemmap ? */ - if (__io_map_base > __hyp_vmemmap) { - __io_map_base -= size; - addr = (unsigned long)ERR_PTR(-ENOMEM); - goto out; - } + /* The allocated size is always a multiple of PAGE_SIZE */ + base = addr + PAGE_ALIGN(size); - err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot); - if (err) { - addr = (unsigned long)ERR_PTR(err); - goto out; + /* Are we overflowing on the vmemmap ? */ + if (!addr || base > __hyp_vmemmap) + ret = -ENOMEM; + else { + __io_map_base = base; + *haddr = addr; } - addr = addr + offset_in_page(phys); -out: hyp_spin_unlock(&pkvm_pgd_lock); - return addr; + return ret; +} + +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot, + unsigned long *haddr) +{ + unsigned long addr; + int err; + + size = PAGE_ALIGN(size + offset_in_page(phys)); + err = pkvm_alloc_private_va_range(size, &addr); + if (err) + return err; + + err = __pkvm_create_mappings(addr, size, phys, prot); + if (err) + return err; + + *haddr = addr + offset_in_page(phys); + return err; } int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot) @@ -146,7 +170,8 @@ int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot) int hyp_map_vectors(void) { phys_addr_t phys; - void *bp_base; + unsigned long bp_base; + int ret; if (!kvm_system_needs_idmapped_vectors()) { __hyp_bp_vect_base = __bp_harden_hyp_vecs; @@ -154,13 +179,12 @@ int hyp_map_vectors(void) } phys = __hyp_pa(__bp_harden_hyp_vecs); - bp_base = (void *)__pkvm_create_private_mapping(phys, - __BP_HARDEN_HYP_VECS_SZ, - PAGE_HYP_EXEC); - if (IS_ERR_OR_NULL(bp_base)) - return PTR_ERR(bp_base); + ret = __pkvm_create_private_mapping(phys, __BP_HARDEN_HYP_VECS_SZ, + PAGE_HYP_EXEC, &bp_base); + if (ret) + return ret; - __hyp_bp_vect_base = bp_base; + __hyp_bp_vect_base = (void *)bp_base; return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 27af337f9fea..e8d4ea2fcfa0 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -99,17 +99,42 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, return ret; for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i); + unsigned long hyp_addr; + start = (void *)kern_hyp_va(per_cpu_base[i]); end = start + PAGE_ALIGN(hyp_percpu_size); ret = pkvm_create_mappings(start, end, PAGE_HYP); if (ret) return ret; - end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va; - start = end - PAGE_SIZE; - ret = pkvm_create_mappings(start, end, PAGE_HYP); + /* + * Allocate a contiguous HYP private VA range for the stack + * and guard page. The allocation is also aligned based on + * the order of its size. + */ + ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr); + if (ret) + return ret; + + /* + * Since the stack grows downwards, map the stack to the page + * at the higher address and leave the lower guard page + * unbacked. + * + * Any valid stack address now has the PAGE_SHIFT bit as 1 + * and addresses corresponding to the guard page have the + * PAGE_SHIFT bit as 0 - this is used for overflow detection. + */ + hyp_spin_lock(&pkvm_pgd_lock); + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE, + PAGE_SIZE, params->stack_pa, PAGE_HYP); + hyp_spin_unlock(&pkvm_pgd_lock); if (ret) return ret; + + /* Update stack_hyp_va to end of the stack's private VA range */ + params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE); } /* diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index caace61ea459..978f1b94fb25 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -377,7 +377,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) return exit_code; } -void __noreturn hyp_panic(void) +asmlinkage void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); @@ -399,6 +399,11 @@ void __noreturn hyp_panic(void) unreachable(); } +asmlinkage void __noreturn hyp_panic_bad_stack(void) +{ + hyp_panic(); +} + asmlinkage void kvm_unexpected_el2_exception(void) { return __kvm_unexpected_el2_exception(); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 5400fc020164..f5651a05b6a8 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -258,8 +258,8 @@ static bool kvm_host_owns_hyp_mappings(void) return true; } -static int __create_hyp_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot) +int __create_hyp_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot) { int err; @@ -457,23 +457,22 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return 0; } -static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, - unsigned long *haddr, - enum kvm_pgtable_prot prot) + +/** + * hyp_alloc_private_va_range - Allocates a private VA range. + * @size: The size of the VA range to reserve. + * @haddr: The hypervisor virtual start address of the allocation. + * + * The private virtual address (VA) range is allocated below io_map_base + * and aligned based on the order of @size. + * + * Return: 0 on success or negative error code on failure. + */ +int hyp_alloc_private_va_range(size_t size, unsigned long *haddr) { unsigned long base; int ret = 0; - if (!kvm_host_owns_hyp_mappings()) { - base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, - phys_addr, size, prot); - if (IS_ERR_OR_NULL((void *)base)) - return PTR_ERR((void *)base); - *haddr = base; - - return 0; - } - mutex_lock(&kvm_hyp_pgd_mutex); /* @@ -484,8 +483,10 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, * * The allocated size is always a multiple of PAGE_SIZE. */ - size = PAGE_ALIGN(size + offset_in_page(phys_addr)); - base = io_map_base - size; + base = io_map_base - PAGE_ALIGN(size); + + /* Align the allocation based on the order of its size */ + base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size)); /* * Verify that BIT(VA_BITS - 1) hasn't been flipped by @@ -495,19 +496,40 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, if ((base ^ io_map_base) & BIT(VA_BITS - 1)) ret = -ENOMEM; else - io_map_base = base; + *haddr = io_map_base = base; mutex_unlock(&kvm_hyp_pgd_mutex); + return ret; +} + +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, + enum kvm_pgtable_prot prot) +{ + unsigned long addr; + int ret = 0; + + if (!kvm_host_owns_hyp_mappings()) { + addr = kvm_call_hyp_nvhe(__pkvm_create_private_mapping, + phys_addr, size, prot); + if (IS_ERR_VALUE(addr)) + return addr; + *haddr = addr; + + return 0; + } + + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + ret = hyp_alloc_private_va_range(size, &addr); if (ret) - goto out; + return ret; - ret = __create_hyp_mappings(base, size, phys_addr, prot); + ret = __create_hyp_mappings(addr, size, phys_addr, prot); if (ret) - goto out; + return ret; - *haddr = base + offset_in_page(phys_addr); -out: + *haddr = addr + offset_in_page(phys_addr); return ret; } |