From 071b6d4a5d343046f253a5a8835d477d93992002 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 5 Dec 2017 14:56:42 +0000 Subject: arm64: fpsimd: Prevent registers leaking from dead tasks Currently, loading of a task's fpsimd state into the CPU registers is skipped if that task's state is already present in the registers of that CPU. However, the code relies on the struct fpsimd_state * (and by extension struct task_struct *) to unambiguously identify a task. There is a particular case in which this doesn't work reliably: when a task exits, its task_struct may be recycled to describe a new task. Consider the following scenario: 1) Task P loads its fpsimd state onto cpu C. per_cpu(fpsimd_last_state, C) := P; P->thread.fpsimd_state.cpu := C; 2) Task X is scheduled onto C and loads its fpsimd state on C. per_cpu(fpsimd_last_state, C) := X; X->thread.fpsimd_state.cpu := C; 3) X exits, causing X's task_struct to be freed. 4) P forks a new child T, which obtains X's recycled task_struct. T == X. T->thread.fpsimd_state.cpu == C (inherited from P). 5) T is scheduled on C. T's fpsimd state is not loaded, because per_cpu(fpsimd_last_state, C) == T (== X) && T->thread.fpsimd_state.cpu == C. (This is the check performed by fpsimd_thread_switch().) So, T gets X's registers because the last registers loaded onto C were those of X, in (2). This patch fixes the problem by ensuring that the sched-in check fails in (5): fpsimd_flush_task_state(T) is called when T is forked, so that T->thread.fpsimd_state.cpu == C cannot be true. This relies on the fact that T is not schedulable until after copy_thread() completes. Once T's fpsimd state has been loaded on some CPU C there may still be other cpus D for which per_cpu(fpsimd_last_state, D) == &X->thread.fpsimd_state. But D is necessarily != C in this case, and the check in (5) must fail. An alternative fix would be to do refcounting on task_struct. This would result in each CPU holding a reference to the last task whose fpsimd state was loaded there. It's not clear whether this is preferable, and it involves higher overhead than the fix proposed in this patch. It would also move all the task_struct freeing work into the context switch critical section, or otherwise some deferred cleanup mechanism would need to be introduced, neither of which seems obviously justified. Cc: Fixes: 005f78cd8849 ("arm64: defer reloading a task's FPSIMD state to userland resume") Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel [will: word-smithed the comment so it makes more sense] Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b2adcce7bc18..6b7dcf4310ac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_tsk_thread_flag(p, TIF_SVE); p->thread.sve_state = NULL; + /* + * In case p was allocated the same task_struct pointer as some + * other recently-exited task, make sure p is disassociated from + * any cpu that may have run that now-exited task recently. + * Otherwise we could erroneously skip reloading the FPSIMD + * registers for p. + */ + fpsimd_flush_task_state(p); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; -- cgit v1.2.3 From 8884b7bd7e52de20a801c5f457954ed212c0f625 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 6 Dec 2017 16:45:46 +0000 Subject: arm64: fpsimd: Abstract out binding of task's fpsimd context to the cpu. There is currently some duplicate logic to associate current's FPSIMD context with the cpu when loading FPSIMD state into the cpu regs. Subsequent patches will update that logic, so in order to ensure it only needs to be done in one place, this patch factors the relevant code out into a new function fpsimd_bind_to_cpu(). Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5084e699447a..e330cc0b573d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -991,6 +991,18 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); } +/* + * Associate current's FPSIMD context with this cpu + * Preemption must be disabled when calling this function. + */ +static void fpsimd_bind_to_cpu(void) +{ + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + __this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); +} + /* * Load the userland FPSIMD state of 'current' from memory, but only if the * FPSIMD state already held in the registers is /not/ the most recent FPSIMD @@ -1004,11 +1016,8 @@ void fpsimd_restore_current_state(void) local_bh_disable(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { - struct fpsimd_state *st = ¤t->thread.fpsimd_state; - task_fpsimd_load(); - __this_cpu_write(fpsimd_last_state, st); - st->cpu = smp_processor_id(); + fpsimd_bind_to_cpu(); } local_bh_enable(); @@ -1032,12 +1041,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state) task_fpsimd_load(); - if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { - struct fpsimd_state *st = ¤t->thread.fpsimd_state; - - __this_cpu_write(fpsimd_last_state, st); - st->cpu = smp_processor_id(); - } + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_bind_to_cpu(); local_bh_enable(); } -- cgit v1.2.3 From 0adbdfde8cfc9415aeed2a4955d2d17b3bd9bf13 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Dec 2017 10:42:10 +0000 Subject: arm64: SW PAN: Point saved ttbr0 at the zero page when switching to init_mm update_saved_ttbr0 mandates that mm->pgd is not swapper, since swapper contains kernel mappings and should never be installed into ttbr0. However, this means that callers must avoid passing the init_mm to update_saved_ttbr0 which in turn can cause the saved ttbr0 value to be out-of-date in the context of the idle thread. For example, EFI runtime services may leave the saved ttbr0 pointing at the EFI page table, and kernel threads may end up with stale references to freed page tables. This patch changes update_saved_ttbr0 so that the init_mm points the saved ttbr0 value to the empty zero page, which always exists and never contains valid translations. EFI and switch can then call into update_saved_ttbr0 unconditionally. Cc: Mark Rutland Cc: Ard Biesheuvel Cc: Vinayak Menon Cc: Fixes: 39bc88e5e38e9b21 ("arm64: Disable TTBR0_EL1 during normal kernel execution") Reviewed-by: Catalin Marinas Reviewed-by: Mark Rutland Reported-by: Vinayak Menon Signed-off-by: Will Deacon --- arch/arm64/include/asm/efi.h | 4 +--- arch/arm64/include/asm/mmu_context.h | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 650344d01124..c4cd5081d78b 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm) * Defer the switch to the current thread's TTBR0_EL1 * until uaccess_enable(). Restore the current * thread's saved ttbr0 corresponding to its active_mm - * (if different from init_mm). */ cpu_set_reserved_ttbr0(); - if (current->active_mm != &init_mm) - update_saved_ttbr0(current, current->active_mm); + update_saved_ttbr0(current, current->active_mm); } } } diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3257895a9b5e..f7773f90546e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -174,11 +174,17 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) { - if (system_uses_ttbr0_pan()) { - BUG_ON(mm->pgd == swapper_pg_dir); - task_thread_info(tsk)->ttbr0 = - virt_to_phys(mm->pgd) | ASID(mm) << 48; - } + u64 ttbr; + + if (!system_uses_ttbr0_pan()) + return; + + if (mm == &init_mm) + ttbr = __pa_symbol(empty_zero_page); + else + ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + + task_thread_info(tsk)->ttbr0 = ttbr; } #else static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -214,11 +220,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, * Update the saved TTBR0_EL1 of the scheduled-in task as the previous * value may have not been initialised yet (activate_mm caller) or the * ASID has changed since the last run (following the context switch - * of another thread of the same process). Avoid setting the reserved - * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit). + * of another thread of the same process). */ - if (next != &init_mm) - update_saved_ttbr0(tsk, next); + update_saved_ttbr0(tsk, next); } #define deactivate_mm(tsk,mm) do { } while (0) -- cgit v1.2.3 From d96cc49bff5a7735576cc6f6f111f875d101cec8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Dec 2017 10:51:12 +0000 Subject: arm64: SW PAN: Update saved ttbr0 value on enter_lazy_tlb enter_lazy_tlb is called when a kernel thread rides on the back of another mm, due to a context switch or an explicit call to unuse_mm where a call to switch_mm is elided. In these cases, it's important to keep the saved ttbr value up to date with the active mm, otherwise we can end up with a stale value which points to a potentially freed page table. This patch implements enter_lazy_tlb for arm64, so that the saved ttbr0 is kept up-to-date with the active mm for kernel threads. Cc: Mark Rutland Cc: Ard Biesheuvel Cc: Vinayak Menon Cc: Fixes: 39bc88e5e38e9b21 ("arm64: Disable TTBR0_EL1 during normal kernel execution") Reviewed-by: Catalin Marinas Reviewed-by: Mark Rutland Reported-by: Vinayak Menon Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f7773f90546e..9d155fa9a507 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -156,20 +156,6 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) -/* - * This is called when "tsk" is about to enter lazy TLB mode. - * - * mm: describes the currently active mm context - * tsk: task which is entering lazy tlb - * cpu: cpu number which is entering lazy tlb - * - * tsk->mm will be NULL - */ -static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -} - #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) @@ -193,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, } #endif +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ + /* + * We don't actually care about the ttbr0 mapping, so point it at the + * zero page. + */ + update_saved_ttbr0(tsk, &init_mm); +} + static inline void __switch_mm(struct mm_struct *next) { unsigned int cpu = smp_processor_id(); -- cgit v1.2.3 From cb968afc789821cdf9e17e79ef08ab90e5bae0f2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 6 Dec 2017 16:45:47 +0000 Subject: arm64/sve: Avoid dereference of dead task_struct in KVM guest entry When deciding whether to invalidate FPSIMD state cached in the cpu, the backend function sve_flush_cpu_state() attempts to dereference __this_cpu_read(fpsimd_last_state). However, this is not safe: there is no guarantee that this task_struct pointer is still valid, because the task could have exited in the meantime. This means that we need another means to get the appropriate value of TIF_SVE for the associated task. This patch solves this issue by adding a cached copy of the TIF_SVE flag in fpsimd_last_state, which we can check without dereferencing the task pointer. In particular, although this patch is not a KVM fix per se, this means that this check is now done safely in the KVM world switch path (which is currently the only user of this code). Signed-off-by: Dave Martin Cc: Ard Biesheuvel Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e330cc0b573d..540a1e010eb5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -114,7 +114,12 @@ * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ -static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); +struct fpsimd_last_state_struct { + struct fpsimd_state *st; + bool sve_in_use; +}; + +static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ static int sve_default_vl = -1; @@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next) */ struct fpsimd_state *st = &next->thread.fpsimd_state; - if (__this_cpu_read(fpsimd_last_state) == st + if (__this_cpu_read(fpsimd_last_state.st) == st && st->cpu == smp_processor_id()) clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); else @@ -997,9 +1002,12 @@ void fpsimd_signal_preserve_current_state(void) */ static void fpsimd_bind_to_cpu(void) { + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); struct fpsimd_state *st = ¤t->thread.fpsimd_state; - __this_cpu_write(fpsimd_last_state, st); + last->st = st; + last->sve_in_use = test_thread_flag(TIF_SVE); st->cpu = smp_processor_id(); } @@ -1057,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t) static inline void fpsimd_flush_cpu_state(void) { - __this_cpu_write(fpsimd_last_state, NULL); + __this_cpu_write(fpsimd_last_state.st, NULL); } /* @@ -1070,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void) #ifdef CONFIG_ARM64_SVE void sve_flush_cpu_state(void) { - struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state); - struct task_struct *tsk; - - if (!fpstate) - return; + struct fpsimd_last_state_struct const *last = + this_cpu_ptr(&fpsimd_last_state); - tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state); - if (test_tsk_thread_flag(tsk, TIF_SVE)) + if (last->st && last->sve_in_use) fpsimd_flush_cpu_state(); } #endif /* CONFIG_ARM64_SVE */ @@ -1272,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { } #ifdef CONFIG_HOTPLUG_CPU static int fpsimd_cpu_dead(unsigned int cpu) { - per_cpu(fpsimd_last_state, cpu) = NULL; + per_cpu(fpsimd_last_state.st, cpu) = NULL; return 0; } -- cgit v1.2.3