diff options
Diffstat (limited to 'arch/x86')
44 files changed, 226 insertions, 138 deletions
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 4ff01176c1cc..21d56ae83cdf 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -54,7 +54,7 @@ int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *b /* else */ state = st_wordcmp; opptr = option; - /* fall through */ + fallthrough; case st_wordcmp: if (c == '=' && !*opptr) { @@ -129,7 +129,7 @@ int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option) state = st_wordcmp; opptr = option; wstart = pos; - /* fall through */ + fallthrough; case st_wordcmp: if (!*opptr) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 0048269180d5..dde7cb3724df 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -178,7 +178,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size, } *size = 0; } - /* Fall through */ + fallthrough; default: /* * If w/o offset, only size specified, memmap=nn[KMG] has the diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7..2f84c7ca74ea 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 3a07ce3ec70b..f1f96d4d8cd6 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -29,11 +29,6 @@ SYM_CODE_START_NOALIGN(\name) SYM_CODE_END(\name) .endm -#ifdef CONFIG_TRACE_IRQFLAGS - THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 - THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 -#endif - #ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 50963472ee85..31e6887d24f1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4682,7 +4682,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); - /* fall through */ + fallthrough; case INTEL_FAM6_CORE2_MEROM_L: case INTEL_FAM6_CORE2_PENRYN: @@ -5062,7 +5062,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SKYLAKE_X: pmem = true; - /* fall through */ + fallthrough; case INTEL_FAM6_SKYLAKE_L: case INTEL_FAM6_SKYLAKE: case INTEL_FAM6_KABYLAKE_L: @@ -5114,7 +5114,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ICELAKE_X: case INTEL_FAM6_ICELAKE_D: pmem = true; - /* fall through */ + fallthrough; case INTEL_FAM6_ICELAKE_L: case INTEL_FAM6_ICELAKE: case INTEL_FAM6_TIGERLAKE_L: diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 63f58bdf556c..8961653c5dd2 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1268,7 +1268,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) ret = X86_BR_ZERO_CALL; break; } - /* fall through */ + fallthrough; case 0x9a: /* call far absolute */ ret = X86_BR_CALL; break; diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index a8f9315b9eae..6fe54b2813c1 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) * state, not the interrupt state as imagined by Xen. */ unsigned long flags = native_save_fl(); - WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | - X86_EFLAGS_NT)); + unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT; + + /* + * For !SMAP hardware we patch out CLAC on entry. + */ + if (boot_cpu_has(X86_FEATURE_SMAP) || + (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + mask |= X86_EFLAGS_AC; + + WARN_ON_ONCE(flags & mask); /* We think we came from user mode. Make sure pt_regs agrees. */ WARN_ON_ONCE(!user_mode(regs)); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 0a301ad0b02f..9257667d13c5 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -59,5 +59,6 @@ typedef struct { } void leave_mm(int cpu); +#define leave_mm leave_mm #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 40aa69d04862..d8324a236696 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c3daf0aaa0ee..cdaab30880b9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -239,7 +239,7 @@ void __init arch_init_ideal_nops(void) return; } - /* fall through */ + fallthrough; default: #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 21325a4a78b9..779a89e31c4c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -800,7 +800,7 @@ static int irq_polarity(int idx) return IOAPIC_POL_HIGH; case MP_IRQPOL_RESERVED: pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); - /* fall through */ + fallthrough; case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_POL_LOW; @@ -848,7 +848,7 @@ static int irq_trigger(int idx) return IOAPIC_EDGE; case MP_IRQTRIG_RESERVED: pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); - /* fall through */ + fallthrough; case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_LEVEL; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 7bda71def557..99ee61c9ba54 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -149,7 +149,7 @@ void __init default_setup_apic_routing(void) break; } /* P4 and above */ - /* fall through */ + fallthrough; case X86_VENDOR_HYGON: case X86_VENDOR_AMD: def_to_bigsmp = 1; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index dae32d948bf2..f8a56b5dc29f 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -161,6 +161,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, apicd->move_in_progress = true; apicd->prev_vector = apicd->vector; apicd->prev_cpu = apicd->cpu; + WARN_ON_ONCE(apicd->cpu == newcpu); } else { irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, managed); @@ -910,7 +911,7 @@ void send_cleanup_vector(struct irq_cfg *cfg) __send_cleanup_vector(apicd); } -static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) +void irq_complete_move(struct irq_cfg *cfg) { struct apic_chip_data *apicd; @@ -918,15 +919,16 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) if (likely(!apicd->move_in_progress)) return; - if (vector == apicd->vector && apicd->cpu == smp_processor_id()) + /* + * If the interrupt arrived on the new target CPU, cleanup the + * vector on the old target CPU. A vector check is not required + * because an interrupt can never move from one vector to another + * on the same CPU. + */ + if (apicd->cpu == smp_processor_id()) __send_cleanup_vector(apicd); } -void irq_complete_move(struct irq_cfg *cfg) -{ - __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); -} - /* * Called from fixup_irqs() with @desc->lock held and interrupts disabled. */ diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index c7503be92f35..57074cf3ad7c 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -248,7 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, switch (leaf) { case 1: l1 = &l1i; - /* fall through */ + fallthrough; case 0: if (!l1->val) return; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 7843ab3fde09..3a44346f2276 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -199,7 +199,7 @@ static int raise_local(void) * calling irq_enter, but the necessary * machinery isn't exported currently. */ - /*FALL THROUGH*/ + fallthrough; case MCJ_CTX_PROCESS: raise_exception(m, NULL); break; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index d8f9230d2034..abe9fe0fb851 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -193,7 +193,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) if (!atomic_sub_return(1, &cmci_storm_on_cpus)) pr_notice("CMCI storm subsided: switching to interrupt mode\n"); - /* FALLTHROUGH */ + fallthrough; case CMCI_STORM_SUBSIDED: /* diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 72182809b333..ca670919b561 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -98,7 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) case 7: if (size < 0x40) break; - /* Else, fall through */ + fallthrough; case 6: case 5: case 4: diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 8cdf29ffd95f..b98ff620ba77 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -349,7 +349,7 @@ static int arch_build_bp_info(struct perf_event *bp, hw->len = X86_BREAKPOINT_LEN_X; return 0; } - /* fall through */ + fallthrough; default: return -EINVAL; } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 68acd30c6b87..c2f02f308ecf 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -450,7 +450,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) linux_regs->ip = addr; - /* fall through */ + fallthrough; case 'D': case 'k': /* clear the trace bit */ @@ -539,7 +539,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) * a system call which should be ignored */ return NOTIFY_DONE; - /* fall through */ + fallthrough; default: if (user_mode(regs)) return NOTIFY_DONE; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 411af4aa7b51..baa21090c9be 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -312,7 +312,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) case 2: if (i == 0 || i == 13) continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ + fallthrough; default: if (i == 2) continue; /* IRQ2 is never connected */ @@ -356,7 +356,7 @@ static void __init construct_ioapic_table(int mpc_default_type) default: pr_err("???\nUnknown standard configuration %d\n", mpc_default_type); - /* fall through */ + fallthrough; case 1: case 5: memcpy(bus.bustype, "ISA ", 6); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994d8393f2f7..13ce616cc7af 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -684,9 +684,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); safe_halt(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -792,7 +790,6 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { - trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -804,7 +801,6 @@ static __cpuidle void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 5679aa3fdcb8..e7537c5440bb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -204,7 +204,7 @@ static int set_segment_reg(struct task_struct *task, case offsetof(struct user_regs_struct, ss): if (unlikely(value == 0)) return -EIO; - /* Else, fall through */ + fallthrough; default: *pt_regs_access(task_pt_regs(task), offset) = value; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0ec7ced727fe..a515e2d230b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -654,7 +654,7 @@ static void native_machine_emergency_restart(void) case BOOT_CF9_FORCE: port_cf9_safe = true; - /* Fall through */ + fallthrough; case BOOT_CF9_SAFE: if (port_cf9_safe) { diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d5fa494c2304..be0d7d4152ec 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -726,7 +726,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->ax = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->ax = regs->orig_ax; regs->ip -= 2; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 27aa04a95702..f5ef689dd62a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1594,14 +1594,28 @@ int native_cpu_disable(void) if (ret) return ret; - /* - * Disable the local APIC. Otherwise IPI broadcasts will reach - * it. It still responds normally to INIT, NMI, SMI, and SIPI - * messages. - */ - apic_soft_disable(); cpu_disable_common(); + /* + * Disable the local APIC. Otherwise IPI broadcasts will reach + * it. It still responds normally to INIT, NMI, SMI, and SIPI + * messages. + * + * Disabling the APIC must happen after cpu_disable_common() + * which invokes fixup_irqs(). + * + * Disabling the APIC preserves already set bits in IRR, but + * an interrupt arriving after disabling the local APIC does not + * set the corresponding IRR bit. + * + * fixup_irqs() scans IRR for set bits so it can raise a not + * yet handled interrupt on the new destination CPU via an IPI + * but obviously it can't do so for IRR bits which are not set. + * IOW, interrupts arriving after disabling the local APIC will + * be lost. + */ + apic_soft_disable(); + return 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1f66d2d1e998..81a2fb711091 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) +static __always_inline unsigned long debug_read_clear_dr6(void) { - /* - * Disable breakpoints during exception handling; recursive exceptions - * are exceedingly 'fun'. - * - * Since this function is NOKPROBE, and that also applies to - * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a - * HW_BREAKPOINT_W on our stack) - * - * Entry text is excluded for HW_BP_X and cpu_entry_area, which - * includes the entry stack is excluded for everything. - */ - *dr7 = local_db_save(); + unsigned long dr6; /* * The Intel SDM says: @@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) * * Keep it simple: clear DR6 immediately. */ - get_debugreg(*dr6, 6); + get_debugreg(dr6, 6); set_debugreg(0, 6); /* Filter out all the reserved bits which are preset to 1 */ - *dr6 &= ~DR6_RESERVED; -} + dr6 &= ~DR6_RESERVED; -static __always_inline void debug_exit(unsigned long dr7) -{ - local_db_restore(dr7); + return dr6; } /* @@ -863,6 +849,18 @@ out: static __always_inline void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { + /* + * Disable breakpoints during exception handling; recursive exceptions + * are exceedingly 'fun'. + * + * Since this function is NOKPROBE, and that also applies to + * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a + * HW_BREAKPOINT_W on our stack) + * + * Entry text is excluded for HW_BP_X and cpu_entry_area, which + * includes the entry stack is excluded for everything. + */ + unsigned long dr7 = local_db_save(); bool irq_state = idtentry_enter_nmi(regs); instrumentation_begin(); @@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, instrumentation_end(); idtentry_exit_nmi(regs, irq_state); + + local_db_restore(dr7); } static __always_inline void exc_debug_user(struct pt_regs *regs, @@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, */ WARN_ON_ONCE(!user_mode(regs)); + /* + * NB: We can't easily clear DR7 here because + * idtentry_exit_to_usermode() can invoke ptrace, schedule, access + * user memory, etc. This means that a recursive #DB is possible. If + * this happens, that #DB will hit exc_debug_kernel() and clear DR7. + * Since we're not on the IST stack right now, everything will be + * fine. + */ + irqentry_enter_from_user_mode(regs); instrumentation_begin(); @@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); - exc_debug_kernel(regs, dr6); - debug_exit(dr7); + exc_debug_kernel(regs, debug_read_clear_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); - exc_debug_user(regs, dr6); - debug_exit(dr7); + exc_debug_user(regs, debug_read_clear_dr6()); } #else /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); + unsigned long dr6 = debug_read_clear_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); else exc_debug_kernel(regs, dr6); - - debug_exit(dr7); } #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 15e5aad8ac2c..3fdaa042823d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -735,7 +735,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * OPCODE1() of the "short" jmp which checks the same condition. */ opc1 = OPCODE2(insn) - 0x10; - /* fall through */ + fallthrough; default: if (!is_cond_jmp_opcode(opc1)) return -ENOSYS; @@ -892,7 +892,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, fix_ip_or_call = 0; break; } - /* fall through */ + fallthrough; default: riprel_analyze(auprobe, &insn); } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d0e2825ae617..5299ef5ff18d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3016,7 +3016,7 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt) case 0xa4: /* movsb */ case 0xa5: /* movsd/w */ *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1; - /* fall through */ + fallthrough; case 0xaa: /* stosb */ case 0xab: /* stosd/w */ *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 814d3aee5cef..1d330564eed8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1779,7 +1779,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); if (ret != HV_STATUS_INVALID_PORT_ID) break; - /* fall through - maybe userspace knows this conn_id. */ + fallthrough; /* maybe userspace knows this conn_id */ case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index c47d2acec529..4aa1c2e00e2a 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -285,7 +285,7 @@ int kvm_set_routing_entry(struct kvm *kvm, switch (ue->u.irqchip.irqchip) { case KVM_IRQCHIP_PIC_SLAVE: e->irqchip.pin += PIC_NUM_PINS / 2; - /* fall through */ + fallthrough; case KVM_IRQCHIP_PIC_MASTER: if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2) return -EINVAL; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5ccbee7165a2..35cca2e0c802 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1053,7 +1053,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, switch (delivery_mode) { case APIC_DM_LOWEST: vcpu->arch.apic_arb_prio++; - /* fall through */ + fallthrough; case APIC_DM_FIXED: if (unlikely(trig_mode && !level)) break; @@ -1341,7 +1341,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; case APIC_TASKPRI: report_tpr_access(apic, false); - /* fall thru */ + fallthrough; default: val = kvm_lapic_get_reg(apic, offset); break; @@ -2027,7 +2027,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT0: apic_manage_nmi_watchdog(apic, val); - /* fall through */ + fallthrough; case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a5d0207e7189..43fdb0c12a5d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4422,7 +4422,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; - /* fall through */ + fallthrough; case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 03dd7bac8034..0194336b64a4 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2668,7 +2668,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); - /* Fall through */ + fallthrough; default: return kvm_set_msr_common(vcpu, msr); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46ba2e03a892..819c185adf09 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4654,7 +4654,7 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) vmcs_read32(VM_EXIT_INSTRUCTION_LEN); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) return false; - /* fall through */ + fallthrough; case DB_VECTOR: return !(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)); @@ -4827,7 +4827,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) } kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); - /* fall through */ + fallthrough; case BP_VECTOR: /* * Update instruction length as we may reinject #BP from @@ -5257,7 +5257,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) error_code = vmcs_read32(IDT_VECTORING_ERROR_CODE); } - /* fall through */ + fallthrough; case INTR_TYPE_SOFT_EXCEPTION: kvm_clear_exception_queue(vcpu); break; @@ -5610,7 +5610,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) * keeping track of global entries in shadow page tables. */ - /* fall-through */ + fallthrough; case INVPCID_TYPE_ALL_INCL_GLOBAL: kvm_mmu_unload(vcpu); return kvm_skip_emulated_instruction(vcpu); @@ -6578,7 +6578,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, break; case INTR_TYPE_SOFT_EXCEPTION: vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ + fallthrough; case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { u32 err = vmcs_read32(error_code_field); @@ -6588,7 +6588,7 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, break; case INTR_TYPE_SOFT_INTR: vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); - /* fall through */ + fallthrough; case INTR_TYPE_EXT_INTR: kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33945283fe07..d39d6cf1d473 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1116,14 +1116,12 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) vcpu->arch.eff_db[dr] = val; break; case 4: - /* fall through */ case 6: if (!kvm_dr6_valid(val)) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); break; case 5: - /* fall through */ default: /* 7 */ if (!kvm_dr7_valid(val)) return -1; /* #GP */ @@ -1154,12 +1152,10 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: - /* fall through */ case 6: *val = vcpu->arch.dr6; break; case 5: - /* fall through */ default: /* 7 */ *val = vcpu->arch.dr7; break; @@ -3051,7 +3047,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: - pr = true; /* fall through */ + pr = true; + fallthrough; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: if (kvm_pmu_is_valid_msr(vcpu, msr)) @@ -4359,7 +4356,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_HYPERV_SYNIC2: if (cap->args[0]) return -EINVAL; - /* fall through */ + fallthrough; case KVM_CAP_HYPERV_SYNIC: if (!irqchip_in_kernel(vcpu->kvm)) @@ -8672,7 +8669,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - /* fall through */ + fallthrough; case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d46fff11f06f..aa067859a70b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_cmdline.o = -pg endif -CFLAGS_cmdline.o := -fno-stack-protector +CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables endif inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 4f1719e22d3c..b6da09339308 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -58,7 +58,7 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, state = st_wordcmp; opptr = option; wstart = pos; - /* fall through */ + fallthrough; case st_wordcmp: if (!*opptr) { @@ -89,7 +89,7 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, break; } state = st_wordskip; - /* fall through */ + fallthrough; case st_wordskip: if (!c) @@ -151,7 +151,7 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, state = st_wordcmp; opptr = option; - /* fall through */ + fallthrough; case st_wordcmp: if ((c == '=') && !*opptr) { @@ -172,7 +172,7 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, break; } state = st_wordskip; - /* fall through */ + fallthrough; case st_wordskip: if (myisspace(c)) diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 31600d851fd8..5e69603ff63f 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -179,7 +179,7 @@ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) if (insn->addr_bytes == 2) return -EINVAL; - /* fall through */ + fallthrough; case -EDOM: case offsetof(struct pt_regs, bx): @@ -362,7 +362,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) case INAT_SEG_REG_GS: return vm86regs->gs; case INAT_SEG_REG_IGNORE: - /* fall through */ default: return -EINVAL; } @@ -386,7 +385,6 @@ static short get_segment_selector(struct pt_regs *regs, int seg_reg_idx) */ return get_user_gs(regs); case INAT_SEG_REG_IGNORE: - /* fall through */ default: return -EINVAL; } @@ -786,7 +784,7 @@ int insn_get_code_seg_params(struct pt_regs *regs) */ return INSN_CODE_SEG_PARAMS(4, 8); case 3: /* Invalid setting. CS.L=1, CS.D=1 */ - /* fall through */ + fallthrough; default: return -EINVAL; } diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c index 73dc66d887f3..ec071cbb0804 100644 --- a/arch/x86/math-emu/errors.c +++ b/arch/x86/math-emu/errors.c @@ -186,7 +186,7 @@ void FPU_printall(void) case TAG_Special: /* Update tagi for the printk below */ tagi = FPU_Special(r); - /* fall through */ + fallthrough; case TAG_Valid: printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, getsign(r) ? '-' : '+', diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c index 127ea54122d7..4a9887851ad8 100644 --- a/arch/x86/math-emu/fpu_trig.c +++ b/arch/x86/math-emu/fpu_trig.c @@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) case TW_Denormal: if (denormal_operand() < 0) return; - /* fall through */ + fallthrough; case TAG_Zero: case TAG_Valid: setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 35f1498e9832..6e3e8a124903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } +/* + * Handle a fault on the vmalloc or module mapping area + * + * This is needed because there is a race condition between the time + * when the vmalloc mapping code updates the PMD to the point in time + * where it synchronizes this update with the other page-tables in the + * system. + * + * In this race window another thread/CPU can map an area on the same + * PMD, finds it already present and does not synchronize it with the + * rest of the system yet. As a result v[mz]alloc might return areas + * which are not mapped in every page-table in the system, causing an + * unhandled page-fault when they are accessed. + */ +static noinline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3_pa(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + + if (pmd_large(*pmd_k)) + return 0; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} +NOKPROBE_SYMBOL(vmalloc_fault); + void arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; @@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, */ WARN_ON_ONCE(hw_error_code & X86_PF_PK); +#ifdef CONFIG_X86_32 + /* + * We can fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * Before doing this on-demand faulting, ensure that the + * fault is not any of the following: + * 1. A fault on a PTE with a reserved bit set. + * 2. A fault caused by a user-mode access. (Do not demand- + * fault kernel memory due to user-mode accesses). + * 3. A fault caused by a page-level protection violation. + * (A demand fault would be on a non-present page which + * would have X86_PF_PROT==0). + * + * This is only needed to close a race condition on x86-32 in + * the vmalloc mapping/unmapping code. See the comment above + * vmalloc_fault() for details. On x86-64 the race does not + * exist as the vmalloc mappings don't need to be synchronized + * there. + */ + if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { + if (vmalloc_fault(address) >= 0) + return; + } +#endif + /* Was the fault spurious, caused by lazy TLB invalidation? */ if (spurious_kernel_fault(hw_error_code, address)) return; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 84d85dbd1dad..9e5ccc56f8e0 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -574,7 +574,7 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr, /* For SEV, these areas are encrypted */ if (sev_active()) break; - /* Fallthrough */ + fallthrough; case E820_TYPE_PRAM: return true; diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index c5174b4e318b..683cd12f4793 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, u64 addr, u64 max_addr, u64 size) { return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size, - 0, NULL, NUMA_NO_NODE); + 0, NULL, 0); } static int __init setup_emu2phys_nid(int *dfl_phys_nid) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 1a3569b43aa5..0951b47e64c1 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -555,21 +555,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); load_new_mm_cr3(next->pgd, new_asid, true); - /* - * NB: This gets called via leave_mm() in the idle path - * where RCU functions differently. Tracing normally - * uses RCU, so we need to use the _rcuidle variant. - * - * (There is no good reason for this. The idle code should - * be rearranged to call this before rcu_idle_enter().) - */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ load_new_mm_cr3(next->pgd, new_asid, false); - /* See above wrt _rcuidle. */ - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); } /* Make sure we write CR3 before loaded_mm. */ |