diff options
Diffstat (limited to 'arch/arm64/kernel')
30 files changed, 776 insertions, 625 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 03e8311ce576..cce308586fcc 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,6 +14,12 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector +# It's not safe to invoke KCOV when portions of the kernel environment aren't +# available or are out-of-sync with HW state. Since `noinstr` doesn't always +# inhibit KCOV instrumentation, disable it for the entire compilation unit. +KCOV_INSTRUMENT_entry.o := n +KCOV_INSTRUMENT_idle.o := n + # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-common.o entry-fpsimd.o process.o ptrace.o \ @@ -22,7 +28,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o idreg-override.o patching.o + syscall.o proton-pack.o idreg-override.o idle.o \ + patching.o targets += efi-entry.o diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index c906d20c7b52..3fb79b76e9d9 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu */ if (!is_module) { dsb(ish); - __flush_icache_all(); + icache_inval_all_pou(); isb(); /* Ignore ARM64_CB bit from feature mask */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0cb34ccb6e73..bd0fc23d8719 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -27,6 +27,7 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); + DEFINE(TSK_CPU, offsetof(struct task_struct, cpu)); BLANK(); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); @@ -99,7 +100,6 @@ int main(void) DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); - DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 16d35cfffcea..125d5c9471ac 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -109,6 +109,24 @@ bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); /* + * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs + * support it? + */ +static bool __read_mostly allow_mismatched_32bit_el0; + +/* + * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have + * seen at least one CPU capable of 32-bit EL0. + */ +DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); + +/* + * Mask of CPUs supporting 32-bit EL0. + * Only valid if arm64_mismatched_32bit_el0 is enabled. + */ +static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; + +/* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This * will be used to determine if a new booting CPU should @@ -401,6 +419,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_gmid[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_isar0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), @@ -618,6 +641,9 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + /* Op1 = 1, CRn = 0, CRm = 0 */ + ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), + /* Op1 = 3, CRn = 0, CRm = 0 */ { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), @@ -768,7 +794,7 @@ static void __init sort_ftr_regs(void) * Any bits that are not covered by an arm64_ftr_bits entry are considered * RES0 for the system-wide value, and must strictly match. */ -static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) +static void init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; @@ -864,6 +890,31 @@ static void __init init_cpu_hwcaps_indirect_list(void) static void __init setup_boot_cpu_capabilities(void); +static void init_32bit_cpu_features(struct cpuinfo_32bit *info) +{ + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); + init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); +} + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -883,35 +934,17 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); - if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { - init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); - init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); - init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); - init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); - init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); - init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); - init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); - init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); - init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); - init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); - init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); - init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); - init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); - init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); - init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); - init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); - init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); - init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); - init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); - init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); - init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); - } + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) + init_32bit_cpu_features(&info->aarch32); if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); sve_init_vq_map(); } + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) + init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); + /* * Initialize the indirect array of CPU hwcaps capabilities pointers * before we handle the boot CPU below. @@ -976,21 +1009,29 @@ static void relax_cpu_ftr_reg(u32 sys_id, int field) WARN_ON(!ftrp->width); } -static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, - struct cpuinfo_arm64 *boot) +static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + static bool boot_cpu_32bit_regs_overridden = false; + + if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden) + return; + + if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0)) + return; + + boot->aarch32 = info->aarch32; + init_32bit_cpu_features(&boot->aarch32); + boot_cpu_32bit_regs_overridden = true; +} + +static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info, + struct cpuinfo_32bit *boot) { int taint = 0; u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); /* - * If we don't have AArch32 at all then skip the checks entirely - * as the register values may be UNKNOWN and we're not going to be - * using them for anything. - */ - if (!id_aa64pfr0_32bit_el0(pfr0)) - return taint; - - /* * If we don't have AArch32 at EL1, then relax the strictness of * EL1-dependent register fields to avoid spurious sanity check fails. */ @@ -1136,10 +1177,29 @@ void update_cpu_features(int cpu, } /* + * The kernel uses the LDGM/STGM instructions and the number of tags + * they read/write depends on the GMID_EL1.BS field. Check that the + * value is the same on all CPUs. + */ + if (IS_ENABLED(CONFIG_ARM64_MTE) && + id_aa64pfr1_mte(info->reg_id_aa64pfr1)) { + taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu, + info->reg_gmid, boot->reg_gmid); + } + + /* + * If we don't have AArch32 at all then skip the checks entirely + * as the register values may be UNKNOWN and we're not going to be + * using them for anything. + * * This relies on a sanitised view of the AArch64 ID registers * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last. */ - taint |= update_32bit_cpu_features(cpu, info, boot); + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + lazy_init_32bit_cpu_features(info, boot); + taint |= update_32bit_cpu_features(cpu, &info->aarch32, + &boot->aarch32); + } /* * Mismatched CPU features are a recipe for disaster. Don't even @@ -1249,6 +1309,28 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) return feature_matches(val, entry); } +const struct cpumask *system_32bit_el0_cpumask(void) +{ + if (!system_supports_32bit_el0()) + return cpu_none_mask; + + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) + return cpu_32bit_el0_mask; + + return cpu_possible_mask; +} + +static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return allow_mismatched_32bit_el0; + + if (scope == SCOPE_SYSTEM) + pr_info("detected: 32-bit EL0 Support\n"); + + return true; +} + static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope) { bool has_sre; @@ -1867,10 +1949,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_copy_el2regs, }, { - .desc = "32-bit EL0 Support", - .capability = ARM64_HAS_32BIT_EL0, + .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .matches = has_cpuid_feature, + .matches = has_32bit_el0, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, @@ -2379,7 +2460,7 @@ static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { {}, }; -static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) +static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -2424,7 +2505,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) return rc; } -static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) +static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { /* We support emulation of accesses to CPU ID feature registers */ cpu_set_named_feature(CPUID); @@ -2599,7 +2680,7 @@ static void check_early_cpu_features(void) } static void -verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) +__verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) @@ -2610,6 +2691,14 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } +static void verify_local_elf_hwcaps(void) +{ + __verify_local_elf_hwcaps(arm64_elf_hwcaps); + + if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1))) + __verify_local_elf_hwcaps(compat_elf_hwcaps); +} + static void verify_sve_features(void) { u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); @@ -2674,11 +2763,7 @@ static void verify_local_cpu_capabilities(void) * on all secondary CPUs. */ verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU); - - verify_local_elf_hwcaps(arm64_elf_hwcaps); - - if (system_supports_32bit_el0()) - verify_local_elf_hwcaps(compat_elf_hwcaps); + verify_local_elf_hwcaps(); if (system_supports_sve()) verify_sve_features(); @@ -2813,6 +2898,34 @@ void __init setup_cpu_features(void) ARCH_DMA_MINALIGN); } +static int enable_mismatched_32bit_el0(unsigned int cpu) +{ + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); + + if (cpu_32bit) { + cpumask_set_cpu(cpu, cpu_32bit_el0_mask); + static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0); + setup_elf_hwcaps(compat_elf_hwcaps); + } + + return 0; +} + +static int __init init_32bit_el0_mask(void) +{ + if (!allow_mismatched_32bit_el0) + return 0; + + if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL)) + return -ENOMEM; + + return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "arm64/mismatched_32bit_el0:online", + enable_mismatched_32bit_el0, NULL); +} +subsys_initcall_sync(init_32bit_el0_mask); + static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); @@ -2906,8 +3019,8 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn) } static struct undef_hook mrs_hook = { - .instr_mask = 0xfff00000, - .instr_val = 0xd5300000, + .instr_mask = 0xffff0000, + .instr_val = 0xd5380000, .pstate_mask = PSR_AA32_MODE_MASK, .pstate_val = PSR_MODE_EL0t, .fn = emulate_mrs, diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 51fcf99d5351..87731fea5e41 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -246,7 +246,7 @@ static struct kobj_type cpuregs_kobj_type = { struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \ \ if (info->reg_midr) \ - return sprintf(buf, "0x%016x\n", info->reg_##_field); \ + return sprintf(buf, "0x%016llx\n", info->reg_##_field); \ else \ return 0; \ } \ @@ -344,6 +344,32 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } +static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info) +{ + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); + info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1); + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1); + info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); + + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); +} + static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); @@ -371,31 +397,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); - /* Update the 32bit ID registers only if AArch32 is implemented */ - if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); - info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1); - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1); - info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); - info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); - - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); - } + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) + info->reg_gmid = read_cpuid(GMID_EL1); + + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) + __cpuinfo_store_cpu_32bit(&info->aarch32); if (IS_ENABLED(CONFIG_ARM64_SVE) && id_aa64pfr0_sve(info->reg_id_aa64pfr0)) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 0073b24b5d25..61a87fa1c305 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -28,7 +28,8 @@ SYM_CODE_START(efi_enter_kernel) * stale icache entries from before relocation. */ ldr w1, =kernel_size - bl __clean_dcache_area_poc + add x1, x0, x1 + bl dcache_clean_poc ic ialluis /* @@ -36,8 +37,8 @@ SYM_CODE_START(efi_enter_kernel) * so that we can safely disable the MMU and caches. */ adr x0, 0f - ldr w1, 3f - bl __clean_dcache_area_poc + adr x1, 3f + bl dcache_clean_poc 0: /* Turn off Dcache and MMU */ mrs x0, CurrentEL @@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel) mov x2, xzr mov x3, xzr br x19 +3: SYM_CODE_END(efi_enter_kernel) -3: .long . - 0b diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 340d04e13617..12ce14a98b7c 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,7 +6,11 @@ */ #include <linux/context_tracking.h> +#include <linux/linkage.h> +#include <linux/lockdep.h> #include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/thread_info.h> #include <asm/cpufeature.h> @@ -15,7 +19,11 @@ #include <asm/exception.h> #include <asm/kprobes.h> #include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/sdei.h> +#include <asm/stacktrace.h> #include <asm/sysreg.h> +#include <asm/system_misc.h> /* * This is intended to match the logic in irqentry_enter(), handling the kernel @@ -67,7 +75,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) } } -void noinstr arm64_enter_nmi(struct pt_regs *regs) +static void noinstr arm64_enter_nmi(struct pt_regs *regs) { regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); @@ -80,7 +88,7 @@ void noinstr arm64_enter_nmi(struct pt_regs *regs) ftrace_nmi_enter(); } -void noinstr arm64_exit_nmi(struct pt_regs *regs) +static void noinstr arm64_exit_nmi(struct pt_regs *regs) { bool restore = regs->lockdep_hardirqs; @@ -97,7 +105,7 @@ void noinstr arm64_exit_nmi(struct pt_regs *regs) __nmi_exit(); } -asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) +static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) arm64_enter_nmi(regs); @@ -105,7 +113,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) enter_from_kernel_mode(regs); } -asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) +static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) arm64_exit_nmi(regs); @@ -113,6 +121,65 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); } +static void __sched arm64_preempt_schedule_irq(void) +{ + lockdep_assert_irqs_disabled(); + + /* + * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC + * priority masking is used the GIC irqchip driver will clear DAIF.IF + * using gic_arch_enable_irqs() for normal IRQs. If anything is set in + * DAIF we must have handled an NMI, so skip preemption. + */ + if (system_uses_irq_prio_masking() && read_sysreg(daif)) + return; + + /* + * Preempting a task from an IRQ means we leave copies of PSTATE + * on the stack. cpufeature's enable calls may modify PSTATE, but + * resuming one of these preempted tasks would undo those changes. + * + * Only allow a task to be preempted once cpufeatures have been + * enabled. + */ + if (system_capabilities_finalized()) + preempt_schedule_irq(); +} + +static void do_interrupt_handler(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + if (on_thread_stack()) + call_on_irq_stack(regs, handler); + else + handler(regs); +} + +extern void (*handle_arch_irq)(struct pt_regs *); +extern void (*handle_arch_fiq)(struct pt_regs *); + +static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector, + unsigned int esr) +{ + arm64_enter_nmi(regs); + + console_verbose(); + + pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n", + vector, smp_processor_id(), esr, + esr_get_class_string(esr)); + + __show_regs(regs); + panic("Unhandled exception"); +} + +#define UNHANDLED(el, regsize, vector) \ +asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs) \ +{ \ + const char *desc = #regsize "-bit " #el " " #vector; \ + __panic_unhandled(regs, desc, read_sysreg(esr_el1)); \ +} + #ifdef CONFIG_ARM64_ERRATUM_1463225 static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); @@ -162,6 +229,11 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +UNHANDLED(el1t, 64, sync) +UNHANDLED(el1t, 64, irq) +UNHANDLED(el1t, 64, fiq) +UNHANDLED(el1t, 64, error) + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -193,15 +265,6 @@ static void noinstr el1_undef(struct pt_regs *regs) exit_to_kernel_mode(regs); } -static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) -{ - enter_from_kernel_mode(regs); - local_daif_inherit(regs); - bad_mode(regs, 0, esr); - local_daif_mask(); - exit_to_kernel_mode(regs); -} - static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) { regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); @@ -245,7 +308,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } -asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -275,10 +338,50 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) el1_fpac(regs, esr); break; default: - el1_inv(regs, esr); + __panic_unhandled(regs, "64-bit el1h sync", esr); } } +static void noinstr el1_interrupt(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + enter_el1_irq_or_nmi(regs); + do_interrupt_handler(regs, handler); + + /* + * Note: thread_info::preempt_count includes both thread_info::count + * and thread_info::need_resched, and is not equivalent to + * preempt_count(). + */ + if (IS_ENABLED(CONFIG_PREEMPTION) && + READ_ONCE(current_thread_info()->preempt_count) == 0) + arm64_preempt_schedule_irq(); + + exit_el1_irq_or_nmi(regs); +} + +asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs) +{ + el1_interrupt(regs, handle_arch_irq); +} + +asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs) +{ + el1_interrupt(regs, handle_arch_fiq); +} + +asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + local_daif_restore(DAIF_ERRCTX); + arm64_enter_nmi(regs); + do_serror(regs, esr); + arm64_exit_nmi(regs); +} + asmlinkage void noinstr enter_from_user_mode(void) { lockdep_hardirqs_off(CALLER_ADDR0); @@ -398,7 +501,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) enter_from_user_mode(); do_debug_exception(far, esr, regs); - local_daif_restore(DAIF_PROCCTX_NOIRQ); + local_daif_restore(DAIF_PROCCTX); } static void noinstr el0_svc(struct pt_regs *regs) @@ -415,7 +518,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) do_ptrauth_fault(regs, esr); } -asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -468,6 +571,56 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) } } +static void noinstr el0_interrupt(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + enter_from_user_mode(); + + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + if (regs->pc & BIT(55)) + arm64_apply_bp_hardening(); + + do_interrupt_handler(regs, handler); +} + +static void noinstr __el0_irq_handler_common(struct pt_regs *regs) +{ + el0_interrupt(regs, handle_arch_irq); +} + +asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs) +{ + __el0_irq_handler_common(regs); +} + +static void noinstr __el0_fiq_handler_common(struct pt_regs *regs) +{ + el0_interrupt(regs, handle_arch_fiq); +} + +asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs) +{ + __el0_fiq_handler_common(regs); +} + +static void __el0_error_handler_common(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + enter_from_user_mode(); + local_daif_restore(DAIF_ERRCTX); + arm64_enter_nmi(regs); + do_serror(regs, esr); + arm64_exit_nmi(regs); + local_daif_restore(DAIF_PROCCTX); +} + +asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs) +{ + __el0_error_handler_common(regs); +} + #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { @@ -483,7 +636,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) do_el0_svc_compat(regs); } -asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) +asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -526,4 +679,71 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) el0_inv(regs, esr); } } + +asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs) +{ + __el0_irq_handler_common(regs); +} + +asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs) +{ + __el0_fiq_handler_common(regs); +} + +asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs) +{ + __el0_error_handler_common(regs); +} +#else /* CONFIG_COMPAT */ +UNHANDLED(el0t, 32, sync) +UNHANDLED(el0t, 32, irq) +UNHANDLED(el0t, 32, fiq) +UNHANDLED(el0t, 32, error) #endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_VMAP_STACK +asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) +{ + unsigned int esr = read_sysreg(esr_el1); + unsigned long far = read_sysreg(far_el1); + + arm64_enter_nmi(regs); + panic_bad_stack(regs, esr, far); +} +#endif /* CONFIG_VMAP_STACK */ + +#ifdef CONFIG_ARM_SDE_INTERFACE +asmlinkage noinstr unsigned long +__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) +{ + unsigned long ret; + + /* + * We didn't take an exception to get here, so the HW hasn't + * set/cleared bits in PSTATE that we may rely on. + * + * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to + * whether PSTATE bits are inherited unchanged or generated from + * scratch, and the TF-A implementation always clears PAN and always + * clears UAO. There are no other known implementations. + * + * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how + * PSTATE is modified upon architectural exceptions, and so PAN is + * either inherited or set per SCTLR_ELx.SPAN, and UAO is always + * cleared. + * + * We must explicitly reset PAN to the expected state, including + * clearing it when the host isn't using it, in case a VM had it set. + */ + if (system_uses_hw_pan()) + set_pstate_pan(1); + else if (cpu_has_pan()) + set_pstate_pan(0); + + arm64_enter_nmi(regs); + ret = do_sdei_event(regs, arg); + arm64_exit_nmi(regs); + + return ret; +} +#endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3513984a88bd..863d44f73028 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -33,12 +33,6 @@ * Context tracking and irqflag tracing need to instrument transitions between * user and kernel mode. */ - .macro user_exit_irqoff -#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) - bl enter_from_user_mode -#endif - .endm - .macro user_enter_irqoff #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl exit_to_user_mode @@ -51,16 +45,7 @@ .endr .endm -/* - * Bad Abort numbers - *----------------- - */ -#define BAD_SYNC 0 -#define BAD_IRQ 1 -#define BAD_FIQ 2 -#define BAD_ERROR 3 - - .macro kernel_ventry, el, label, regsize = 64 + .macro kernel_ventry, el:req, ht:req, regsize:req, label:req .align 7 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 .if \el == 0 @@ -87,7 +72,7 @@ alternative_else_nop_endif tbnz x0, #THREAD_SHIFT, 0f sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp - b el\()\el\()_\label + b el\el\ht\()_\regsize\()_\label 0: /* @@ -119,7 +104,7 @@ alternative_else_nop_endif sub sp, sp, x0 mrs x0, tpidrro_el0 #endif - b el\()\el\()_\label + b el\el\ht\()_\regsize\()_\label .endm .macro tramp_alias, dst, sym @@ -275,7 +260,7 @@ alternative_else_nop_endif mte_set_kernel_gcr x22, x23 - scs_load tsk, x20 + scs_load tsk .else add x21, sp, #PT_REGS_SIZE get_current_task tsk @@ -285,7 +270,7 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * For exceptions from EL0, create a terminal frame record. + * For exceptions from EL0, create a final frame record. * For exceptions from EL1, create a synthetic frame record so the * interrupted code shows up in the backtrace. */ @@ -375,7 +360,7 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: - scs_save tsk, x0 + scs_save tsk #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH @@ -486,63 +471,12 @@ SYM_CODE_START_LOCAL(__swpan_exit_el0) SYM_CODE_END(__swpan_exit_el0) #endif - .macro irq_stack_entry - mov x19, sp // preserve the original sp -#ifdef CONFIG_SHADOW_CALL_STACK - mov x24, scs_sp // preserve the original shadow stack -#endif - - /* - * Compare sp with the base of the task stack. - * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, - * and should switch to the irq stack. - */ - ldr x25, [tsk, TSK_STACK] - eor x25, x25, x19 - and x25, x25, #~(THREAD_SIZE - 1) - cbnz x25, 9998f - - ldr_this_cpu x25, irq_stack_ptr, x26 - mov x26, #IRQ_STACK_SIZE - add x26, x25, x26 - - /* switch to the irq stack */ - mov sp, x26 - -#ifdef CONFIG_SHADOW_CALL_STACK - /* also switch to the irq shadow stack */ - ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26 -#endif - -9998: - .endm - - /* - * The callee-saved regs (x19-x29) should be preserved between - * irq_stack_entry and irq_stack_exit, but note that kernel_entry - * uses x20-x23 to store data for later use. - */ - .macro irq_stack_exit - mov sp, x19 -#ifdef CONFIG_SHADOW_CALL_STACK - mov scs_sp, x24 -#endif - .endm - /* GPRs used by entry code */ tsk .req x28 // current thread_info /* * Interrupt handling. */ - .macro irq_handler, handler:req - ldr_l x1, \handler - mov x0, sp - irq_stack_entry - blr x1 - irq_stack_exit - .endm - .macro gic_prio_kentry_setup, tmp:req #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if ARM64_HAS_IRQ_PRIO_MASKING @@ -552,45 +486,6 @@ tsk .req x28 // current thread_info #endif .endm - .macro el1_interrupt_handler, handler:req - enable_da - - mov x0, sp - bl enter_el1_irq_or_nmi - - irq_handler \handler - -#ifdef CONFIG_PREEMPTION - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - /* - * DA were cleared at start of handling, and IF are cleared by - * the GIC irqchip driver using gic_arch_enable_irqs() for - * normal IRQs. If anything is set, it means we come back from - * an NMI instead of a normal IRQ, so skip preemption - */ - mrs x0, daif - orr x24, x24, x0 -alternative_else_nop_endif - cbnz x24, 1f // preempt count != 0 || NMI return path - bl arm64_preempt_schedule_irq // irq en/disable is done inside -1: -#endif - - mov x0, sp - bl exit_el1_irq_or_nmi - .endm - - .macro el0_interrupt_handler, handler:req - user_exit_irqoff - enable_da - - tbz x22, #55, 1f - bl do_el0_irq_bp_hardening -1: - irq_handler \handler - .endm - .text /* @@ -600,32 +495,25 @@ alternative_else_nop_endif .align 11 SYM_CODE_START(vectors) - kernel_ventry 1, sync_invalid // Synchronous EL1t - kernel_ventry 1, irq_invalid // IRQ EL1t - kernel_ventry 1, fiq_invalid // FIQ EL1t - kernel_ventry 1, error_invalid // Error EL1t - - kernel_ventry 1, sync // Synchronous EL1h - kernel_ventry 1, irq // IRQ EL1h - kernel_ventry 1, fiq // FIQ EL1h - kernel_ventry 1, error // Error EL1h - - kernel_ventry 0, sync // Synchronous 64-bit EL0 - kernel_ventry 0, irq // IRQ 64-bit EL0 - kernel_ventry 0, fiq // FIQ 64-bit EL0 - kernel_ventry 0, error // Error 64-bit EL0 - -#ifdef CONFIG_COMPAT - kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 - kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0 - kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 -#else - kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 - kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0 - kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0 -#endif + kernel_ventry 1, t, 64, sync // Synchronous EL1t + kernel_ventry 1, t, 64, irq // IRQ EL1t + kernel_ventry 1, t, 64, fiq // FIQ EL1h + kernel_ventry 1, t, 64, error // Error EL1t + + kernel_ventry 1, h, 64, sync // Synchronous EL1h + kernel_ventry 1, h, 64, irq // IRQ EL1h + kernel_ventry 1, h, 64, fiq // FIQ EL1h + kernel_ventry 1, h, 64, error // Error EL1h + + kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 + kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 + kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 + kernel_ventry 0, t, 64, error // Error 64-bit EL0 + + kernel_ventry 0, t, 32, sync // Synchronous 32-bit EL0 + kernel_ventry 0, t, 32, irq // IRQ 32-bit EL0 + kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0 + kernel_ventry 0, t, 32, error // Error 32-bit EL0 SYM_CODE_END(vectors) #ifdef CONFIG_VMAP_STACK @@ -656,147 +544,46 @@ __bad_stack: ASM_BUG() #endif /* CONFIG_VMAP_STACK */ -/* - * Invalid mode handlers - */ - .macro inv_entry, el, reason, regsize = 64 + + .macro entry_handler el:req, ht:req, regsize:req, label:req +SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) kernel_entry \el, \regsize mov x0, sp - mov x1, #\reason - mrs x2, esr_el1 - bl bad_mode - ASM_BUG() + bl el\el\ht\()_\regsize\()_\label\()_handler + .if \el == 0 + b ret_to_user + .else + b ret_to_kernel + .endif +SYM_CODE_END(el\el\ht\()_\regsize\()_\label) .endm -SYM_CODE_START_LOCAL(el0_sync_invalid) - inv_entry 0, BAD_SYNC -SYM_CODE_END(el0_sync_invalid) - -SYM_CODE_START_LOCAL(el0_irq_invalid) - inv_entry 0, BAD_IRQ -SYM_CODE_END(el0_irq_invalid) - -SYM_CODE_START_LOCAL(el0_fiq_invalid) - inv_entry 0, BAD_FIQ -SYM_CODE_END(el0_fiq_invalid) - -SYM_CODE_START_LOCAL(el0_error_invalid) - inv_entry 0, BAD_ERROR -SYM_CODE_END(el0_error_invalid) - -SYM_CODE_START_LOCAL(el1_sync_invalid) - inv_entry 1, BAD_SYNC -SYM_CODE_END(el1_sync_invalid) - -SYM_CODE_START_LOCAL(el1_irq_invalid) - inv_entry 1, BAD_IRQ -SYM_CODE_END(el1_irq_invalid) - -SYM_CODE_START_LOCAL(el1_fiq_invalid) - inv_entry 1, BAD_FIQ -SYM_CODE_END(el1_fiq_invalid) - -SYM_CODE_START_LOCAL(el1_error_invalid) - inv_entry 1, BAD_ERROR -SYM_CODE_END(el1_error_invalid) - /* - * EL1 mode handlers. + * Early exception handlers */ - .align 6 -SYM_CODE_START_LOCAL_NOALIGN(el1_sync) - kernel_entry 1 - mov x0, sp - bl el1_sync_handler - kernel_exit 1 -SYM_CODE_END(el1_sync) - - .align 6 -SYM_CODE_START_LOCAL_NOALIGN(el1_irq) - kernel_entry 1 - el1_interrupt_handler handle_arch_irq - kernel_exit 1 -SYM_CODE_END(el1_irq) - -SYM_CODE_START_LOCAL_NOALIGN(el1_fiq) - kernel_entry 1 - el1_interrupt_handler handle_arch_fiq - kernel_exit 1 -SYM_CODE_END(el1_fiq) - -/* - * EL0 mode handlers. - */ - .align 6 -SYM_CODE_START_LOCAL_NOALIGN(el0_sync) - kernel_entry 0 - mov x0, sp - bl el0_sync_handler - b ret_to_user -SYM_CODE_END(el0_sync) - -#ifdef CONFIG_COMPAT - .align 6 -SYM_CODE_START_LOCAL_NOALIGN(el0_sync_compat) - kernel_entry 0, 32 - mov x0, sp - bl el0_sync_compat_handler - b ret_to_user -SYM_CODE_END(el0_sync_compat) - - .align 6 -SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) - kernel_entry 0, 32 - b el0_irq_naked -SYM_CODE_END(el0_irq_compat) - -SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat) - kernel_entry 0, 32 - b el0_fiq_naked -SYM_CODE_END(el0_fiq_compat) - -SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) - kernel_entry 0, 32 - b el0_error_naked -SYM_CODE_END(el0_error_compat) -#endif - - .align 6 -SYM_CODE_START_LOCAL_NOALIGN(el0_irq) - kernel_entry 0 -el0_irq_naked: - el0_interrupt_handler handle_arch_irq - b ret_to_user -SYM_CODE_END(el0_irq) - -SYM_CODE_START_LOCAL_NOALIGN(el0_fiq) - kernel_entry 0 -el0_fiq_naked: - el0_interrupt_handler handle_arch_fiq - b ret_to_user -SYM_CODE_END(el0_fiq) - -SYM_CODE_START_LOCAL(el1_error) - kernel_entry 1 - mrs x1, esr_el1 - enable_dbg - mov x0, sp - bl do_serror + entry_handler 1, t, 64, sync + entry_handler 1, t, 64, irq + entry_handler 1, t, 64, fiq + entry_handler 1, t, 64, error + + entry_handler 1, h, 64, sync + entry_handler 1, h, 64, irq + entry_handler 1, h, 64, fiq + entry_handler 1, h, 64, error + + entry_handler 0, t, 64, sync + entry_handler 0, t, 64, irq + entry_handler 0, t, 64, fiq + entry_handler 0, t, 64, error + + entry_handler 0, t, 32, sync + entry_handler 0, t, 32, irq + entry_handler 0, t, 32, fiq + entry_handler 0, t, 32, error + +SYM_CODE_START_LOCAL(ret_to_kernel) kernel_exit 1 -SYM_CODE_END(el1_error) - -SYM_CODE_START_LOCAL(el0_error) - kernel_entry 0 -el0_error_naked: - mrs x25, esr_el1 - user_exit_irqoff - enable_dbg - mov x0, sp - mov x1, x25 - bl do_serror - enable_da - b ret_to_user -SYM_CODE_END(el0_error) +SYM_CODE_END(ret_to_kernel) /* * "slow" syscall return path. @@ -979,8 +766,8 @@ SYM_FUNC_START(cpu_switch_to) mov sp, x9 msr sp_el0, x1 ptrauth_keys_install_kernel x1, x8, x9, x10 - scs_save x0, x8 - scs_load x1, x8 + scs_save x0 + scs_load x1 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) @@ -998,6 +785,42 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) NOKPROBE(ret_from_fork) +/* + * void call_on_irq_stack(struct pt_regs *regs, + * void (*func)(struct pt_regs *)); + * + * Calls func(regs) using this CPU's irq stack and shadow irq stack. + */ +SYM_FUNC_START(call_on_irq_stack) +#ifdef CONFIG_SHADOW_CALL_STACK + stp scs_sp, xzr, [sp, #-16]! + ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17 +#endif + /* Create a frame record to save our LR and SP (implicit in FP) */ + stp x29, x30, [sp, #-16]! + mov x29, sp + + ldr_this_cpu x16, irq_stack_ptr, x17 + mov x15, #IRQ_STACK_SIZE + add x16, x16, x15 + + /* Move to the new stack and call the function there */ + mov sp, x16 + blr x1 + + /* + * Restore the SP from the FP, and restore the FP and LR from the frame + * record. + */ + mov sp, x29 + ldp x29, x30, [sp], #16 +#ifdef CONFIG_SHADOW_CALL_STACK + ldp scs_sp, xzr, [sp], #16 +#endif + ret +SYM_FUNC_END(call_on_irq_stack) +NOKPROBE(call_on_irq_stack) + #ifdef CONFIG_ARM_SDE_INTERFACE #include <asm/sdei.h> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 96873dfa67fd..a6ccd6557d19 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -16,6 +16,7 @@ #include <asm/asm_pointer_auth.h> #include <asm/assembler.h> #include <asm/boot.h> +#include <asm/bug.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> #include <asm/cache.h> @@ -117,8 +118,8 @@ SYM_CODE_START_LOCAL(preserve_boot_args) dmb sy // needed before dc ivac with // MMU off - mov x1, #0x20 // 4 x 8 bytes - b __inval_dcache_area // tail call + add x1, x0, #0x20 // 4 x 8 bytes + b dcache_inval_poc // tail call SYM_CODE_END(preserve_boot_args) /* @@ -268,8 +269,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 - bl __inval_dcache_area + bl dcache_inval_poc /* * Clear the init page tables. @@ -382,39 +382,57 @@ SYM_FUNC_START_LOCAL(__create_page_tables) adrp x0, idmap_pg_dir adrp x1, idmap_pg_end - sub x1, x1, x0 - bl __inval_dcache_area + bl dcache_inval_poc adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 - bl __inval_dcache_area + bl dcache_inval_poc ret x28 SYM_FUNC_END(__create_page_tables) + /* + * Initialize CPU registers with task-specific and cpu-specific context. + * + * Create a final frame record at task_pt_regs(current)->stackframe, so + * that the unwinder can identify the final frame record of any task by + * its location in the task stack. We reserve the entire pt_regs space + * for consistency with user tasks and kthreads. + */ + .macro init_cpu_task tsk, tmp1, tmp2 + msr sp_el0, \tsk + + ldr \tmp1, [\tsk, #TSK_STACK] + add sp, \tmp1, #THREAD_SIZE + sub sp, sp, #PT_REGS_SIZE + + stp xzr, xzr, [sp, #S_STACKFRAME] + add x29, sp, #S_STACKFRAME + + scs_load \tsk + + adr_l \tmp1, __per_cpu_offset + ldr w\tmp2, [\tsk, #TSK_CPU] + ldr \tmp1, [\tmp1, \tmp2, lsl #3] + set_this_cpu_offset \tmp1 + .endm + /* * The following fragment of code is executed with the MMU enabled. * * x0 = __PHYS_OFFSET */ SYM_FUNC_START_LOCAL(__primary_switched) - adrp x4, init_thread_union - add sp, x4, #THREAD_SIZE - adr_l x5, init_task - msr sp_el0, x5 // Save thread_info + adr_l x4, init_task + init_cpu_task x4, x5, x6 adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb - stp xzr, x30, [sp, #-16]! + stp x29, x30, [sp, #-16]! mov x29, sp -#ifdef CONFIG_SHADOW_CALL_STACK - adr_l scs_sp, init_shadow_call_stack // Set shadow call stack -#endif - str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -446,10 +464,9 @@ SYM_FUNC_START_LOCAL(__primary_switched) 0: #endif bl switch_to_vhe // Prefer VHE if possible - add sp, sp, #16 - mov x29, #0 - mov x30, #0 - b start_kernel + ldp x29, x30, [sp], #16 + bl start_kernel + ASM_BUG() SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" @@ -632,21 +649,17 @@ SYM_FUNC_START_LOCAL(__secondary_switched) isb adr_l x0, secondary_data - ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack - cbz x1, __secondary_too_slow - mov sp, x1 ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow - msr sp_el0, x2 - scs_load x2, x3 - mov x29, #0 - mov x30, #0 + + init_cpu_task x2, x1, x3 #ifdef CONFIG_ARM64_PTR_AUTH ptrauth_keys_init_cpu x2, x3, x4, x5 #endif - b secondary_start_kernel + bl secondary_start_kernel + ASM_BUG() SYM_FUNC_END(__secondary_switched) SYM_FUNC_START_LOCAL(__secondary_too_slow) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 8ccca660034e..81c0186a5e32 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -45,7 +45,7 @@ * Because this code has to be copied to a 'safe' page, it can't call out to * other functions by PC-relative address. Also remember that it may be * mid-way through over-writing other functions. For this reason it contains - * code from flush_icache_range() and uses the copy_page() macro. + * code from caches_clean_inval_pou() and uses the copy_page() macro. * * This 'safe' page is mapped via ttbr0, and executed from there. This function * switches to a copy of the linear map in ttbr1, performs the restore, then @@ -87,11 +87,12 @@ SYM_CODE_START(swsusp_arch_suspend_exit) copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on flush_icache_range() */ + /* Clean the copied page to PoU - based on caches_clean_inval_pou() */ raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 -2: dc cvau, x4 /* clean D line / unified line */ +2: /* clean D line / unified line */ +alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 2b diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b1cef371df2b..46a0b4d6e251 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length, return -ENOMEM; memcpy(page, src_start, length); - __flush_icache_range((unsigned long)page, (unsigned long)page + length); + caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length, return 0; } -#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) - #ifdef CONFIG_ARM64_MTE static DEFINE_XARRAY(mte_pages); @@ -383,13 +381,18 @@ int swsusp_arch_suspend(void) ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ - dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); - dcache_clean_range(__idmap_text_start, __idmap_text_end); + dcache_clean_inval_poc((unsigned long)__mmuoff_data_start, + (unsigned long)__mmuoff_data_end); + dcache_clean_inval_poc((unsigned long)__idmap_text_start, + (unsigned long)__idmap_text_end); /* Clean kvm setup code to PoC? */ if (el2_reset_needed()) { - dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); - dcache_clean_range(__hyp_text_start, __hyp_text_end); + dcache_clean_inval_poc( + (unsigned long)__hyp_idmap_text_start, + (unsigned long)__hyp_idmap_text_end); + dcache_clean_inval_poc((unsigned long)__hyp_text_start, + (unsigned long)__hyp_text_end); } swsusp_mte_restore_tags(); @@ -474,7 +477,8 @@ int swsusp_arch_resume(void) * The hibernate exit text contains a set of el2 vectors, that will * be executed at el2 with the mmu off in order to reload hyp-stub. */ - __flush_dcache_area(hibernate_exit, exit_size); + dcache_clean_inval_poc((unsigned long)hibernate_exit, + (unsigned long)hibernate_exit + exit_size); /* * KASLR will cause the el2 vectors to be in a different location in diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c new file mode 100644 index 000000000000..a2cfbacec2bb --- /dev/null +++ b/arch/arm64/kernel/idle.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Low-level idle sequences + */ + +#include <linux/cpu.h> +#include <linux/irqflags.h> + +#include <asm/barrier.h> +#include <asm/cpuidle.h> +#include <asm/cpufeature.h> +#include <asm/sysreg.h> + +/* + * cpu_do_idle() + * + * Idle the processor (wait for interrupt). + * + * If the CPU supports priority masking we must do additional work to + * ensure that interrupts are not masked at the PMR (because the core will + * not wake up if we block the wake up signal in the interrupt controller). + */ +void noinstr cpu_do_idle(void) +{ + struct arm_cpuidle_irq_context context; + + arm_cpuidle_save_irq_context(&context); + + dsb(sy); + wfi(); + + arm_cpuidle_restore_irq_context(&context); +} + +/* + * This is our default idle handler. + */ +void noinstr arch_cpu_idle(void) +{ + /* + * This should do all the clock switching and wait for interrupt + * tricks + */ + cpu_do_idle(); + raw_local_irq_enable(); +} diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index e628c8ce1ffe..53a381a7f65d 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void) for (i = 0; i < ARRAY_SIZE(regs); i++) { if (regs[i]->override) - __flush_dcache_area(regs[i]->override, + dcache_clean_inval_poc((unsigned long)regs[i]->override, + (unsigned long)regs[i]->override + sizeof(*regs[i]->override)); } } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index bcf3c2755370..c96a9a0043bf 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -35,7 +35,7 @@ __efistub_strnlen = __pi_strnlen; __efistub_strcmp = __pi_strcmp; __efistub_strncmp = __pi_strncmp; __efistub_strrchr = __pi_strrchr; -__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc; +__efistub_dcache_clean_poc = __pi_dcache_clean_poc; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) __efistub___memcpy = __pi_memcpy; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 341342b207f6..cfa2cfde3019 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + dcache_clean_inval_poc((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, @@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); - __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + dcache_clean_inval_poc((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); + dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, + (unsigned long)&memstart_offset_seed + + sizeof(memstart_offset_seed)); return offset; } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 90a335c74442..03ceabe4d912 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -68,10 +68,16 @@ int machine_kexec_post_load(struct kimage *kimage) kimage->arch.kern_reloc = __pa(reloc_code); kexec_image_info(kimage); - /* Flush the reloc_code in preparation for its execution. */ - __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); + /* + * For execution with the MMU off, reloc_code needs to be cleaned to the + * PoC and invalidated from the I-cache. + */ + dcache_clean_inval_poc((unsigned long)reloc_code, + (unsigned long)reloc_code + + arm64_relocate_new_kernel_size); + icache_inval_pou((uintptr_t)reloc_code, + (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); return 0; } @@ -102,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage) for (entry = &kimage->head; ; entry++) { unsigned int flag; - void *addr; + unsigned long addr; /* flush the list entries. */ - __flush_dcache_area(entry, sizeof(kimage_entry_t)); + dcache_clean_inval_poc((unsigned long)entry, + (unsigned long)entry + + sizeof(kimage_entry_t)); flag = *entry & IND_FLAGS; if (flag == IND_DONE) break; - addr = phys_to_virt(*entry & PAGE_MASK); + addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); switch (flag) { case IND_INDIRECTION: @@ -120,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage) break; case IND_SOURCE: /* flush the source pages. */ - __flush_dcache_area(addr, PAGE_SIZE); + dcache_clean_inval_poc(addr, addr + PAGE_SIZE); break; case IND_DESTINATION: break; @@ -147,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage) kimage->segment[i].memsz, kimage->segment[i].memsz / PAGE_SIZE); - __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), - kimage->segment[i].memsz); + dcache_clean_inval_poc( + (unsigned long)phys_to_virt(kimage->segment[i].mem), + (unsigned long)phys_to_virt(kimage->segment[i].mem) + + kimage->segment[i].memsz); } } diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 9a6edb9c48c7..771f543464e0 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -99,7 +99,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - __flush_icache_range((uintptr_t)tp, + caches_clean_inval_pou((uintptr_t)tp, (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 88ff471b0bce..4a72c2727309 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -116,7 +116,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, tail = (struct frame_tail __user *)regs->regs[29]; while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0xf)) + tail && !((unsigned long)tail & 0x7)) tail = user_backtrace(tail, entry); } else { #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b..9be668f3f034 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, memcpy(dst, src, len); /* flush caches (dcache/icache) */ - sync_icache_aliases(dst, len); + sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); kunmap_atomic(xol_page_kaddr); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b4bb67f17a2c..6b8b4b3e357a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -18,7 +18,6 @@ #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/kernel.h> -#include <linux/lockdep.h> #include <linux/mman.h> #include <linux/mm.h> #include <linux/nospec.h> @@ -46,7 +45,6 @@ #include <linux/prctl.h> #include <asm/alternative.h> -#include <asm/arch_gicv3.h> #include <asm/compat.h> #include <asm/cpufeature.h> #include <asm/cacheflush.h> @@ -74,63 +72,6 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void noinstr __cpu_do_idle(void) -{ - dsb(sy); - wfi(); -} - -static void noinstr __cpu_do_idle_irqprio(void) -{ - unsigned long pmr; - unsigned long daif_bits; - - daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); - - /* - * Unmask PMR before going idle to make sure interrupts can - * be raised. - */ - pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - - __cpu_do_idle(); - - gic_write_pmr(pmr); - write_sysreg(daif_bits, daif); -} - -/* - * cpu_do_idle() - * - * Idle the processor (wait for interrupt). - * - * If the CPU supports priority masking we must do additional work to - * ensure that interrupts are not masked at the PMR (because the core will - * not wake up if we block the wake up signal in the interrupt controller). - */ -void noinstr cpu_do_idle(void) -{ - if (system_uses_irq_prio_masking()) - __cpu_do_idle_irqprio(); - else - __cpu_do_idle(); -} - -/* - * This is our default idle handler. - */ -void noinstr arch_cpu_idle(void) -{ - /* - * This should do all the clock switching and wait for interrupt - * tricks - */ - cpu_do_idle(); - raw_local_irq_enable(); -} - #ifdef CONFIG_HOTPLUG_CPU void arch_cpu_idle_dead(void) { @@ -435,6 +376,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; + /* + * For the benefit of the unwinder, set up childregs->stackframe + * as the final frame for the new task. + */ + p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; ptrace_hw_copy_thread(p); @@ -527,6 +473,15 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, write_sysreg(val, cntkctl_el1); } +static void compat_thread_switch(struct task_struct *next) +{ + if (!is_compat_thread(task_thread_info(next))) + return; + + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) + set_tsk_thread_flag(next, TIF_NOTIFY_RESUME); +} + static void update_sctlr_el1(u64 sctlr) { /* @@ -568,6 +523,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); ptrauth_thread_switch_user(next); + compat_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -633,8 +589,15 @@ unsigned long arch_align_stack(unsigned long sp) */ void arch_setup_new_exec(void) { - current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; + unsigned long mmflags = 0; + + if (is_compat_task()) { + mmflags = MMCF_AARCH32; + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + } + current->mm->context.flags = mmflags; ptrauth_thread_init_user(); mte_thread_init_user(); @@ -724,22 +687,6 @@ static int __init tagged_addr_init(void) core_initcall(tagged_addr_init); #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ -asmlinkage void __sched arm64_preempt_schedule_irq(void) -{ - lockdep_assert_irqs_disabled(); - - /* - * Preempting a task from an IRQ means we leave copies of PSTATE - * on the stack. cpufeature's enable calls may modify PSTATE, but - * resuming one of these preempted tasks would undo those changes. - * - * Only allow a task to be preempted once cpufeatures have been - * enabled. - */ - if (system_capabilities_finalized()) - preempt_schedule_irq(); -} - #ifdef CONFIG_BINFMT_ELF int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, bool has_interp, bool is_interp) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index eb2f73939b7b..499b6b2f9757 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr, NULL); + on_irq_stack(addr, sizeof(unsigned long), NULL); } /** diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 2c7ca449dd51..47f77d1234cb 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -162,31 +162,33 @@ static int init_sdei_scs(void) return err; } -static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) +static bool on_sdei_normal_stack(unsigned long sp, unsigned long size, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info); + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); } -static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) +static bool on_sdei_critical_stack(unsigned long sp, unsigned long size, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info); + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); } -bool _on_sdei_stack(unsigned long sp, struct stack_info *info) +bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) return false; - if (on_sdei_critical_stack(sp, info)) + if (on_sdei_critical_stack(sp, size, info)) return true; - if (on_sdei_normal_stack(sp, info)) + if (on_sdei_normal_stack(sp, size, info)) return true; return false; @@ -231,13 +233,13 @@ out_err: } /* - * __sdei_handler() returns one of: + * do_sdei_event() returns one of: * SDEI_EV_HANDLED - success, return to the interrupted context. * SDEI_EV_FAILED - failure, return this error code to firmare. * virtual-address - success, return to this address. */ -static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, - struct sdei_registered_event *arg) +unsigned long __kprobes do_sdei_event(struct pt_regs *regs, + struct sdei_registered_event *arg) { u32 mode; int i, err = 0; @@ -292,45 +294,3 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, return vbar + 0x480; } - -static void __kprobes notrace __sdei_pstate_entry(void) -{ - /* - * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to - * whether PSTATE bits are inherited unchanged or generated from - * scratch, and the TF-A implementation always clears PAN and always - * clears UAO. There are no other known implementations. - * - * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how - * PSTATE is modified upon architectural exceptions, and so PAN is - * either inherited or set per SCTLR_ELx.SPAN, and UAO is always - * cleared. - * - * We must explicitly reset PAN to the expected state, including - * clearing it when the host isn't using it, in case a VM had it set. - */ - if (system_uses_hw_pan()) - set_pstate_pan(1); - else if (cpu_has_pan()) - set_pstate_pan(0); -} - -asmlinkage noinstr unsigned long -__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) -{ - unsigned long ret; - - /* - * We didn't take an exception to get here, so the HW hasn't - * set/cleared bits in PSTATE that we may rely on. Initialize PAN. - */ - __sdei_pstate_entry(); - - arm64_enter_nmi(regs); - - ret = _sdei_handler(regs, arg); - - arm64_exit_nmi(regs); - - return ret; -} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 61845c0821d9..b7a35a03e9b9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void) u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; set_cpu_logical_map(0, mpidr); - /* - * clear __my_cpu_offset on boot CPU to avoid hang caused by - * using percpu variable early, for example, lockdep will - * access percpu variable inside lock_release - */ - set_my_cpu_offset(0); pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n", (unsigned long)mpidr, read_cpuid_id()); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 6237486ff6bb..f8192f4ae0b8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -911,6 +911,19 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } +static bool cpu_affinity_invalid(struct pt_regs *regs) +{ + if (!compat_user_mode(regs)) + return false; + + /* + * We're preemptible, but a reschedule will cause us to check the + * affinity again. + */ + return !cpumask_test_cpu(raw_smp_processor_id(), + system_32bit_el0_cpumask()); +} + asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { @@ -938,6 +951,19 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); + + /* + * If we reschedule after checking the affinity + * then we must ensure that TIF_NOTIFY_RESUME + * is set so that we check the affinity again. + * Since tracehook_notify_resume() clears the + * flag, ensure that the compiler doesn't move + * it after the affinity check. + */ + barrier(); + + if (cpu_affinity_invalid(regs)) + force_sig(SIGKILL); } if (thread_flags & _TIF_FOREIGN_FPSTATE) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..2fe8fab886e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -120,9 +120,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * page tables. */ secondary_data.task = idle; - secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); @@ -142,8 +140,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; - secondary_data.stack = NULL; - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) status = READ_ONCE(__early_cpu_boot_status); @@ -202,10 +198,7 @@ asmlinkage notrace void secondary_start_kernel(void) u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; struct mm_struct *mm = &init_mm; const struct cpu_operations *ops; - unsigned int cpu; - - cpu = task_cpu(current); - set_my_cpu_offset(per_cpu_offset(cpu)); + unsigned int cpu = smp_processor_id(); /* * All kernel threads share the same mm context; grab a @@ -452,6 +445,11 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + /* + * The runtime per-cpu areas have been allocated by + * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be + * freed shortly, so we must move over to the runtime per-cpu area. + */ set_my_cpu_offset(per_cpu_offset(smp_processor_id())); cpuinfo_store_boot_cpu(); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c45a83512805..7e1624ecab3c 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -36,7 +36,7 @@ static void write_pen_release(u64 val) unsigned long size = sizeof(secondary_holding_pen_release); secondary_holding_pen_release = val; - __flush_dcache_area(start, size); + dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size); } @@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(pa_holding_pen, release_addr); - __flush_dcache_area((__force void *)release_addr, - sizeof(*release_addr)); + dcache_clean_inval_poc((__force unsigned long)release_addr, + (__force unsigned long)release_addr + + sizeof(*release_addr)); /* * Send an event to wake up the secondary CPU. diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index de07147a7926..b189de5ca6cb 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -68,13 +68,17 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; struct stack_info info; - if (fp & 0xf) - return -EINVAL; - if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp, &info)) + /* Final frame; nothing to unwind */ + if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) + return -ENOENT; + + if (fp & 0x7) + return -EINVAL; + + if (!on_accessible_stack(tsk, fp, 16, &info)) return -EINVAL; if (test_bit(info.type, frame->stacks_done)) @@ -128,12 +132,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ptrauth_strip_insn_pac(frame->pc); - /* - * This is a terminal record, so we have finished unwinding. - */ - if (!frame->fp && !frame->pc) - return -ENOENT; - return 0; } NOKPROBE_SYMBOL(unwind_frame); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index e3f72df9509d..938ce6fbee8a 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -7,6 +7,7 @@ #include <asm/alternative.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> +#include <asm/cpuidle.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> @@ -91,6 +92,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) int ret = 0; unsigned long flags; struct sleep_stack_data state; + struct arm_cpuidle_irq_context context; /* Report any MTE async fault before going to suspend */ mte_suspend_enter(); @@ -103,12 +105,18 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) flags = local_daif_save(); /* - * Function graph tracer state gets incosistent when the kernel + * Function graph tracer state gets inconsistent when the kernel * calls functions that never return (aka suspend finishers) hence * disable graph tracing during their execution. */ pause_graph_tracing(); + /* + * Switch to using DAIF.IF instead of PMR in order to reliably + * resume if we're using pseudo-NMIs. + */ + arm_cpuidle_save_irq_context(&context); + if (__cpu_suspend_enter(&state)) { /* Call the suspend finisher */ ret = fn(arg); @@ -126,6 +134,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) RCU_NONIDLE(__cpu_suspend_exit()); } + arm_cpuidle_restore_irq_context(&context); + unpause_graph_tracing(); /* diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 265fe3eb1069..db5159a3055f 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) dsb(ish); } - ret = __flush_cache_user_range(start, start + chunk); + ret = caches_clean_inval_user_pou(start, start + chunk); if (ret) return ret; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8f66072fa5cb..b03e383d944a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -144,13 +144,6 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { __check_gt, __check_le, __check_al, __check_al }; -static const char *handler[] = { - "Synchronous Abort", - "IRQ", - "FIQ", - "Error" -}; - int show_unhandled_signals = 0; static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) @@ -850,27 +843,8 @@ const char *esr_get_class_string(u32 esr) } /* - * bad_mode handles the impossible case in the exception vector. This is always - * fatal. - */ -asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) -{ - arm64_enter_nmi(regs); - - console_verbose(); - - pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", - handler[reason], smp_processor_id(), esr, - esr_get_class_string(esr)); - - __show_regs(regs); - local_daif_mask(); - panic("bad mode"); -} - -/* * bad_el0_sync handles unexpected, but potentially recoverable synchronous - * exceptions taken from EL0. Unlike bad_mode, this returns. + * exceptions taken from EL0. */ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) { @@ -888,15 +862,11 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) +void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); - unsigned int esr = read_sysreg(esr_el1); - unsigned long far = read_sysreg(far_el1); - - arm64_enter_nmi(regs); console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); @@ -969,15 +939,11 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) +void do_serror(struct pt_regs *regs, unsigned int esr) { - arm64_enter_nmi(regs); - /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - - arm64_exit_nmi(regs); } /* GENERIC_BUG traps */ |