From 096622104e14d8a1db4860bd557717067a0515d2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 18 Aug 2017 16:57:01 +0100 Subject: arm64: fpsimd: Prevent registers leaking across exec There are some tricky dependencies between the different stages of flushing the FPSIMD register state during exec, and these can race with context switch in ways that can cause the old task's regs to leak across. In particular, a context switch during the memset() can cause some of the task's old FPSIMD registers to reappear. Disabling preemption for this small window would be no big deal for performance: preemption is already disabled for similar scenarios like updating the FPSIMD registers in sigreturn. So, instead of rearranging things in ways that might swap existing subtle bugs for new ones, this patch just disables preemption around the FPSIMD state flushing so that races of this type can't occur here. This brings fpsimd_flush_thread() into line with other code paths. Cc: stable@vger.kernel.org Fixes: 674c242c9323 ("arm64: flush FP/SIMD state correctly after execve()") Reviewed-by: Ard Biesheuvel Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 06da8ea16bbe..c7b4995868e1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -161,9 +161,11 @@ void fpsimd_flush_thread(void) { if (!system_supports_fpsimd()) return; + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); + preempt_enable(); } /* -- cgit v1.2.3 From 289d07a2dc6c6b6f3e4b8a62669320d99dbe6c3d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Jul 2017 15:19:22 +0100 Subject: arm64: mm: abort uaccess retries upon fatal signal When there's a fatal signal pending, arm64's do_page_fault() implementation returns 0. The intent is that we'll return to the faulting userspace instruction, delivering the signal on the way. However, if we take a fatal signal during fixing up a uaccess, this results in a return to the faulting kernel instruction, which will be instantly retried, resulting in the same fault being taken forever. As the task never reaches userspace, the signal is not delivered, and the task is left unkillable. While the task is stuck in this state, it can inhibit the forward progress of the system. To avoid this, we must ensure that when a fatal signal is pending, we apply any necessary fixup for a faulting kernel instruction. Thus we will return to an error path, and it is up to that code to make forward progress towards delivering the fatal signal. Cc: Catalin Marinas Cc: Laura Abbott Cc: stable@vger.kernel.org Reviewed-by: Steve Capper Tested-by: Steve Capper Reviewed-by: James Morse Tested-by: James Morse Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2509e4fe6992..1f22a41565a3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -435,8 +435,11 @@ retry: * the mmap_sem because it would already be released * in __lock_page_or_retry in mm/filemap.c. */ - if (fatal_signal_pending(current)) + if (fatal_signal_pending(current)) { + if (!user_mode(regs)) + goto no_context; return 0; + } /* * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of -- cgit v1.2.3 From 4a23e56ad6549d0b8c0fac6d9eb752884379c391 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Aug 2017 18:42:30 +0100 Subject: arm64: kaslr: ignore modulo offset when validating virtual displacement In the KASLR setup routine, we ensure that the early virtual mapping of the kernel image does not cover more than a single table entry at the level above the swapper block level, so that the assembler routines involved in setting up this mapping can remain simple. In this calculation we add the proposed KASLR offset to the values of the _text and _end markers, and reject it if they would end up falling in different swapper table sized windows. However, when taking the addresses of _text and _end, the modulo offset (the physical displacement modulo 2 MB) is already accounted for, and so adding it again results in incorrect results. So disregard the modulo offset from the calculation. Fixes: 08cdac619c81 ("arm64: relocatable: deal with physically misaligned ...") Reviewed-by: Catalin Marinas Tested-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 1 - arch/arm64/kernel/kaslr.c | 12 +++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 973df7de7bf8..adb0910b88f5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -354,7 +354,6 @@ __primary_switched: tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f mov x0, x21 // pass FDT address in x0 - mov x1, x23 // pass modulo offset in x1 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index a9710efb8c01..1d95c204186b 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -75,7 +75,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) +u64 __init kaslr_early_init(u64 dt_phys) { void *fdt; u64 seed, offset, mask, module_range; @@ -133,9 +133,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this * happens, increase the KASLR offset by the size of the kernel image * rounded up by SWAPPER_BLOCK_SIZE. + * + * NOTE: The references to _text and _end below will already take the + * modulo offset (the physical displacement modulo 2 MB) into + * account, given that the physical placement is controlled by + * the loader, and will not change as a result of the virtual + * mapping we choose. */ - if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) { + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) { u64 kimg_sz = _end - _text; offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE)) & mask; -- cgit v1.2.3 From a067d94d37ed590fd17684d18c3edf52110d305a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 22 Aug 2017 15:39:00 +0100 Subject: arm64: kaslr: Adjust the offset to avoid Image across alignment boundary With 16KB pages and a kernel Image larger than 16MB, the current kaslr_early_init() logic for avoiding mappings across swapper table boundaries fails since increasing the offset by kimg_sz just moves the problem to the next boundary. This patch rounds the offset down to (1 << SWAPPER_TABLE_SHIFT) if the Image crosses a PMD_SIZE boundary. Fixes: afd0e5a87670 ("arm64: kaslr: Fix up the kernel image alignment") Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Will Deacon Cc: Neeraj Upadhyay Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 1d95c204186b..47080c49cc7e 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -131,8 +131,7 @@ u64 __init kaslr_early_init(u64 dt_phys) /* * The kernel Image should not extend across a 1GB/32MB/512MB alignment * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this - * happens, increase the KASLR offset by the size of the kernel image - * rounded up by SWAPPER_BLOCK_SIZE. + * happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT). * * NOTE: The references to _text and _end below will already take the * modulo offset (the physical displacement modulo 2 MB) into @@ -141,11 +140,8 @@ u64 __init kaslr_early_init(u64 dt_phys) * mapping we choose. */ if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) { - u64 kimg_sz = _end - _text; - offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE)) - & mask; - } + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT); if (IS_ENABLED(CONFIG_KASAN)) /* -- cgit v1.2.3 From fb1522e099f0c69f36655af233a64e3f55941f5b Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 31 Aug 2017 17:17:37 -0400 Subject: KVM: update to new mmu_notifier semantic v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calls to mmu_notifier_invalidate_page() were replaced by calls to mmu_notifier_invalidate_range() and are now bracketed by calls to mmu_notifier_invalidate_range_start()/end() Remove now useless invalidate_page callback. Changed since v1 (Linus Torvalds) - remove now useless kvm_arch_mmu_notifier_invalidate_page() Signed-off-by: Jérôme Glisse Tested-by: Mike Galbraith Tested-by: Adam Borowski Cc: Paolo Bonzini Cc: Radim Krčmář Cc: kvm@vger.kernel.org Cc: Kirill A. Shutemov Cc: Andrew Morton Cc: Andrea Arcangeli Signed-off-by: Linus Torvalds --- arch/arm/include/asm/kvm_host.h | 6 ------ arch/arm64/include/asm/kvm_host.h | 6 ------ arch/mips/include/asm/kvm_host.h | 5 ----- arch/powerpc/include/asm/kvm_host.h | 5 ----- arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/x86.c | 11 ---------- virt/kvm/kvm_main.c | 42 ------------------------------------- 7 files changed, 77 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 127e2dd2e21c..4a879f6ff13b 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,12 +225,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -/* We do not have shadow page tables, hence the empty hooks */ -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d68630007b14..e923b58606e2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -326,12 +326,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -/* We do not have shadow page tables, hence the empty hooks */ -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2998479fd4e8..a9af1d2dcd69 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -938,11 +938,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - /* Emulation */ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8b3f1238d07f..e372ed871c51 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -67,11 +67,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4d120a3e22e..92c9032502d8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1375,8 +1375,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); -void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05a5e57c6f39..272320eb328c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6734,17 +6734,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); -void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ - /* - * The physical address of apic access page is stored in the VMCS. - * Update it when it becomes invalid. - */ - if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) - kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); -} - /* * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 15252d723b54..4d81f6ded88e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -322,47 +322,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } -static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - struct kvm *kvm = mmu_notifier_to_kvm(mn); - int need_tlb_flush, idx; - - /* - * When ->invalidate_page runs, the linux pte has been zapped - * already but the page is still allocated until - * ->invalidate_page returns. So if we increase the sequence - * here the kvm page fault will notice if the spte can't be - * established because the page is going to be freed. If - * instead the kvm page fault establishes the spte before - * ->invalidate_page runs, kvm_unmap_hva will release it - * before returning. - * - * The sequence increase only need to be seen at spin_unlock - * time, and not at spin_lock time. - * - * Increasing the sequence after the spin_unlock would be - * unsafe because the kvm page fault could then establish the - * pte after kvm_unmap_hva returned, without noticing the page - * is going to be freed. - */ - idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); - - kvm->mmu_notifier_seq++; - need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; - /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) - kvm_flush_remote_tlbs(kvm); - - spin_unlock(&kvm->mmu_lock); - - kvm_arch_mmu_notifier_invalidate_page(kvm, address); - - srcu_read_unlock(&kvm->srcu, idx); -} - static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -510,7 +469,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { - .invalidate_page = kvm_mmu_notifier_invalidate_page, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, -- cgit v1.2.3