From 7d7b720a4b8049446cffce870b1dd3ffa89d4b40 Mon Sep 17 00:00:00 2001 From: "Madhavan T. Venkataraman" Date: Mon, 10 May 2021 12:00:26 +0100 Subject: arm64: Implement stack trace termination record Reliable stacktracing requires that we identify when a stacktrace is terminated early. We can do this by ensuring all tasks have a final frame record at a known location on their task stack, and checking that this is the final frame record in the chain. We'd like to use task_pt_regs(task)->stackframe as the final frame record, as this is already setup upon exception entry from EL0. For kernel tasks we need to consistently reserve the pt_regs and point x29 at this, which we can do with small changes to __primary_switched, __secondary_switched, and copy_process(). Since the final frame record must be at a specific location, we must create the final frame record in __primary_switched and __secondary_switched rather than leaving this to start_kernel and secondary_start_kernel. Thus, __primary_switched and __secondary_switched will now show up in stacktraces for the idle tasks. Since the final frame record is now identified by its location rather than by its contents, we identify it at the start of unwind_frame(), before we read any values from it. External debuggers may terminate the stack trace when FP == 0. In the pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the debugger may print an extra record 0x0 at the end. While this is not pretty, this does not do any harm. This is a small price to pay for having reliable stack trace termination in the kernel. That said, gdb does not show the extra record probably because it uses DWARF and not frame pointers for stack traces. Signed-off-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown [Mark: rebase, use ASM_BUG(), update comments, update commit message] Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm64/kernel/process.c') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b4bb67f17a2c..8928fba54e4b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -435,6 +435,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; + /* + * For the benefit of the unwinder, set up childregs->stackframe + * as the final frame for the new task. + */ + p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; ptrace_hw_copy_thread(p); -- cgit v1.2.3 From 33a3581a76f3a36c7dcc9864120ce681bcfbcff1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Jun 2021 10:46:08 +0100 Subject: arm64: entry: move arm64_preempt_schedule_irq to entry-common.c Subsequent patches will pull more of the IRQ entry handling into C. To keep this in one place, let's move arm64_preempt_schedule_irq() into entry-common.c along with the other entry management functions. We no longer need to include in process.c, so the include directive is removed. There should be no functional change as a result of this patch. Reviewed-by Joey Gouly Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Marc Zyngier Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210607094624.34689-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 20 ++++++++++++++++++++ arch/arm64/kernel/process.c | 17 ----------------- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'arch/arm64/kernel/process.c') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3b7943721077..1fe60578e556 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,7 +6,11 @@ */ #include +#include +#include #include +#include +#include #include #include @@ -113,6 +117,22 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); } +asmlinkage void __sched arm64_preempt_schedule_irq(void) +{ + lockdep_assert_irqs_disabled(); + + /* + * Preempting a task from an IRQ means we leave copies of PSTATE + * on the stack. cpufeature's enable calls may modify PSTATE, but + * resuming one of these preempted tasks would undo those changes. + * + * Only allow a task to be preempted once cpufeatures have been + * enabled. + */ + if (system_capabilities_finalized()) + preempt_schedule_irq(); +} + #ifdef CONFIG_ARM64_ERRATUM_1463225 static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b4bb67f17a2c..2e7337709155 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -724,22 +723,6 @@ static int __init tagged_addr_init(void) core_initcall(tagged_addr_init); #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ -asmlinkage void __sched arm64_preempt_schedule_irq(void) -{ - lockdep_assert_irqs_disabled(); - - /* - * Preempting a task from an IRQ means we leave copies of PSTATE - * on the stack. cpufeature's enable calls may modify PSTATE, but - * resuming one of these preempted tasks would undo those changes. - * - * Only allow a task to be preempted once cpufeatures have been - * enabled. - */ - if (system_capabilities_finalized()) - preempt_schedule_irq(); -} - #ifdef CONFIG_BINFMT_ELF int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, bool has_interp, bool is_interp) -- cgit v1.2.3 From b5df5b8307b1db6d168ffac29eff3974779bb34b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Jun 2021 10:46:24 +0100 Subject: arm64: idle: don't instrument idle code with KCOV The low-level idle code in arch_cpu_idle() and its callees runs at a time where where portions of the kernel environment aren't available. For example, RCU may not be watching, and lockdep state may be out-of-sync with the hardware. Due to this, it is not sound to instrument this code. We generally avoid instrumentation by marking the entry functions as `noinstr`, but currently this doesn't inhibit KCOV instrumentation. Prevent this by factoring these functions into a new idle.c so that we can disable KCOV for the entire compilation unit, as is done for the core idle code in kernel/sched/idle.c. We'd like to keep instrumentation of the rest of process.c, and for the existing code in cpuidle.c, so a new compilation unit is preferable. The arch_cpu_idle_dead() function in process.c is a cpu hotplug function that is safe to instrument, so it is left as-is in process.c. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Marc Zyngier Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210607094624.34689-21-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 3 +- arch/arm64/kernel/idle.c | 69 +++++++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 57 ------------------------------------- 3 files changed, 71 insertions(+), 58 deletions(-) create mode 100644 arch/arm64/kernel/idle.c (limited to 'arch/arm64/kernel/process.c') diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 787c3c83edd7..de434204d723 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ CFLAGS_syscall.o += -fno-stack-protector # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. KCOV_INSTRUMENT_entry.o := n +KCOV_INSTRUMENT_idle.o := n # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ @@ -27,7 +28,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o idreg-override.o + syscall.o proton-pack.o idreg-override.o idle.o targets += efi-entry.o diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c new file mode 100644 index 000000000000..45c79204dc40 --- /dev/null +++ b/arch/arm64/kernel/idle.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Low-level idle sequences + */ + +#include +#include + +#include +#include +#include +#include + +static void noinstr __cpu_do_idle(void) +{ + dsb(sy); + wfi(); +} + +static void noinstr __cpu_do_idle_irqprio(void) +{ + unsigned long pmr; + unsigned long daif_bits; + + daif_bits = read_sysreg(daif); + write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); + + /* + * Unmask PMR before going idle to make sure interrupts can + * be raised. + */ + pmr = gic_read_pmr(); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + __cpu_do_idle(); + + gic_write_pmr(pmr); + write_sysreg(daif_bits, daif); +} + +/* + * cpu_do_idle() + * + * Idle the processor (wait for interrupt). + * + * If the CPU supports priority masking we must do additional work to + * ensure that interrupts are not masked at the PMR (because the core will + * not wake up if we block the wake up signal in the interrupt controller). + */ +void noinstr cpu_do_idle(void) +{ + if (system_uses_irq_prio_masking()) + __cpu_do_idle_irqprio(); + else + __cpu_do_idle(); +} + +/* + * This is our default idle handler. + */ +void noinstr arch_cpu_idle(void) +{ + /* + * This should do all the clock switching and wait for interrupt + * tricks + */ + cpu_do_idle(); + raw_local_irq_enable(); +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 2e7337709155..72c5d80f03fa 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -73,63 +73,6 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void noinstr __cpu_do_idle(void) -{ - dsb(sy); - wfi(); -} - -static void noinstr __cpu_do_idle_irqprio(void) -{ - unsigned long pmr; - unsigned long daif_bits; - - daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); - - /* - * Unmask PMR before going idle to make sure interrupts can - * be raised. - */ - pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - - __cpu_do_idle(); - - gic_write_pmr(pmr); - write_sysreg(daif_bits, daif); -} - -/* - * cpu_do_idle() - * - * Idle the processor (wait for interrupt). - * - * If the CPU supports priority masking we must do additional work to - * ensure that interrupts are not masked at the PMR (because the core will - * not wake up if we block the wake up signal in the interrupt controller). - */ -void noinstr cpu_do_idle(void) -{ - if (system_uses_irq_prio_masking()) - __cpu_do_idle_irqprio(); - else - __cpu_do_idle(); -} - -/* - * This is our default idle handler. - */ -void noinstr arch_cpu_idle(void) -{ - /* - * This should do all the clock switching and wait for interrupt - * tricks - */ - cpu_do_idle(); - raw_local_irq_enable(); -} - #ifdef CONFIG_HOTPLUG_CPU void arch_cpu_idle_dead(void) { -- cgit v1.2.3 From 873c3e89777c8c56f936ae7aceca1a102aac6b9e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Jun 2021 19:02:57 +0100 Subject: arm64: Kill 32-bit applications scheduled on 64-bit-only CPUs Scheduling a 32-bit application on a 64-bit-only CPU is a bad idea. Ensure that 32-bit applications always take the slow-path when returning to userspace on a system with mismatched support at EL0, so that we can avoid trying to run on a 64-bit-only CPU and force a SIGKILL instead. Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210608180313.11502-5-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 19 ++++++++++++++++++- arch/arm64/kernel/signal.c | 26 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel/process.c') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b4bb67f17a2c..f4a91bf1ce0c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -527,6 +527,15 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, write_sysreg(val, cntkctl_el1); } +static void compat_thread_switch(struct task_struct *next) +{ + if (!is_compat_thread(task_thread_info(next))) + return; + + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) + set_tsk_thread_flag(next, TIF_NOTIFY_RESUME); +} + static void update_sctlr_el1(u64 sctlr) { /* @@ -568,6 +577,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); ptrauth_thread_switch_user(next); + compat_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -633,8 +643,15 @@ unsigned long arch_align_stack(unsigned long sp) */ void arch_setup_new_exec(void) { - current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; + unsigned long mmflags = 0; + + if (is_compat_task()) { + mmflags = MMCF_AARCH32; + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + } + current->mm->context.flags = mmflags; ptrauth_thread_init_user(); mte_thread_init_user(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 6237486ff6bb..f8192f4ae0b8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -911,6 +911,19 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } +static bool cpu_affinity_invalid(struct pt_regs *regs) +{ + if (!compat_user_mode(regs)) + return false; + + /* + * We're preemptible, but a reschedule will cause us to check the + * affinity again. + */ + return !cpumask_test_cpu(raw_smp_processor_id(), + system_32bit_el0_cpumask()); +} + asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { @@ -938,6 +951,19 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); + + /* + * If we reschedule after checking the affinity + * then we must ensure that TIF_NOTIFY_RESUME + * is set so that we check the affinity again. + * Since tracehook_notify_resume() clears the + * flag, ensure that the compiler doesn't move + * it after the affinity check. + */ + barrier(); + + if (cpu_affinity_invalid(regs)) + force_sig(SIGKILL); } if (thread_flags & _TIF_FOREIGN_FPSTATE) -- cgit v1.2.3 From d4dc10277255afc303de4f00cbee0b9ce74d870f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Jun 2021 12:12:25 +0100 Subject: arm64: Convert cpu_do_idle() to using cpuidle context helpers Now that we have helpers that are aware of the pseudo-NMI feature, introduce them to cpu_do_idle(). This allows for some nice cleanup. No functional change intended. Tested-by: Valentin Schneider Reviewed-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210615111227.2454465-3-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 41 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) (limited to 'arch/arm64/kernel/process.c') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b4bb67f17a2c..b715c6b2558f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -46,9 +46,9 @@ #include #include -#include #include #include +#include #include #include #include @@ -74,33 +74,6 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void noinstr __cpu_do_idle(void) -{ - dsb(sy); - wfi(); -} - -static void noinstr __cpu_do_idle_irqprio(void) -{ - unsigned long pmr; - unsigned long daif_bits; - - daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); - - /* - * Unmask PMR before going idle to make sure interrupts can - * be raised. - */ - pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - - __cpu_do_idle(); - - gic_write_pmr(pmr); - write_sysreg(daif_bits, daif); -} - /* * cpu_do_idle() * @@ -112,10 +85,14 @@ static void noinstr __cpu_do_idle_irqprio(void) */ void noinstr cpu_do_idle(void) { - if (system_uses_irq_prio_masking()) - __cpu_do_idle_irqprio(); - else - __cpu_do_idle(); + struct arm_cpuidle_irq_context context; + + arm_cpuidle_save_irq_context(&context); + + dsb(sy); + wfi(); + + arm_cpuidle_restore_irq_context(&context); } /* -- cgit v1.2.3