diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic_numachip.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 3 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 50 | ||||
-rw-r--r-- | arch/x86/kernel/kgdb.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 54 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/opt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/xsave.c | 7 |
15 files changed, 169 insertions, 62 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3d525c6124f6..803b684676ff 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1338,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) } /* + * ACPI offers an alternative platform interface model that removes + * ACPI hardware requirements for platforms that do not implement + * the PC Architecture. + * + * We initialize the Hardware-reduced ACPI model here: + */ +static void __init acpi_reduced_hw_init(void) +{ + if (acpi_gbl_reduced_hardware) { + /* + * Override x86_init functions and bypass legacy pic + * in Hardware-reduced ACPI mode + */ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; + legacy_pic = &null_legacy_pic; + } +} + +/* * If your system is blacklisted here, but you find that acpi=force * works for you, please contact linux-acpi@vger.kernel.org */ @@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void) */ early_acpi_process_madt(); + /* + * Hardware-reduced ACPI mode initialization: + */ + acpi_reduced_hw_init(); + return 0; } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c2fd21fed002..017149cded07 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -37,10 +37,12 @@ static const struct apic apic_numachip; static unsigned int get_apic_id(unsigned long x) { unsigned long value; - unsigned int id; + unsigned int id = (x >> 24) & 0xff; - rdmsrl(MSR_FAM10H_NODE_ID, value); - id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U); + if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, value); + id |= (value << 2) & 0xff00; + } return id; } @@ -155,10 +157,18 @@ static int __init numachip_probe(void) static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) { - if (c->phys_proc_id != node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; + u64 val; + u32 nodes = 1; + + this_cpu_write(cpu_llc_id, node); + + /* Account for nodes per socket in multi-core-module processors */ + if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, val); + nodes = ((val >> 3) & 7) + 1; } + + c->phys_proc_id = node / nodes; } static int __init numachip_system_init(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b5c8ff5e9dfc..2346c95c6ab1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1396,6 +1396,12 @@ void cpu_init(void) wait_for_master_cpu(cpu); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); + show_ucode_info_early(); printk(KERN_INFO "Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 94d7dcb12145..50163fa9034f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -565,8 +565,8 @@ static const struct _tlb_table intel_tlb_table[] = { { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, - { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" }, - { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" }, + { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, + { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 498b6d967138..258990688a5e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -212,11 +212,11 @@ static struct event_constraint intel_hsw_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ - INTEL_EVENT_CONSTRAINT(0x08a3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ - INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ - INTEL_EVENT_CONSTRAINT(0x04a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), EVENT_CONSTRAINT_END }; @@ -1649,11 +1649,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event if (c) return c; - c = intel_pebs_constraints(event); + c = intel_shared_regs_constraints(cpuc, event); if (c) return c; - c = intel_shared_regs_constraints(cpuc, event); + c = intel_pebs_constraints(event); if (c) return c; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 000d4199b03e..31e2d5bf3e38 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -982,6 +982,9 @@ ENTRY(xen_hypervisor_callback) ENTRY(xen_do_upcall) 1: mov %esp, %eax call xen_evtchn_do_upcall +#ifndef CONFIG_PREEMPT + call xen_maybe_preempt_hcall +#endif jmp ret_from_intr CFI_ENDPROC ENDPROC(xen_hypervisor_callback) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index db13655c3a2a..f0095a76c182 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -269,11 +269,14 @@ ENTRY(ret_from_fork) testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? jz 1f - testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET - jnz int_ret_from_sys_call - - RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET - jmp ret_from_sys_call # go to the SYSRET fastpath + /* + * By the time we get here, we have no idea whether our pt_regs, + * ti flags, and ti status came from the 64-bit SYSCALL fast path, + * the slow path, or one of the ia32entry paths. + * Use int_ret_from_sys_call to return, since it can safely handle + * all of the above. + */ + jmp int_ret_from_sys_call 1: subq $REST_SKIP, %rsp # leave space for volatiles @@ -361,12 +364,21 @@ system_call_fastpath: * Has incomplete stack frame and undefined top of stack. */ ret_from_sys_call: - testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jnz int_ret_from_sys_call_fixup /* Go the the slow path */ - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF + + /* + * We must check ti flags with interrupts (or at least preemption) + * off because we must *never* return to userspace without + * processing exit work that is enqueued if we're preempted here. + * In particular, returning to userspace with any of the one-shot + * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is + * very bad. + */ + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz int_ret_from_sys_call_fixup /* Go the the slow path */ + CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: @@ -383,7 +395,7 @@ ret_from_sys_call: int_ret_from_sys_call_fixup: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_ret_from_sys_call + jmp int_ret_from_sys_call_irqs_off /* Do syscall tracing */ tracesys: @@ -429,6 +441,7 @@ tracesys_phase2: GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF +int_ret_from_sys_call_irqs_off: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ GLOBAL(int_with_check) @@ -786,7 +799,21 @@ retint_swapgs: /* return to user-space */ cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ jne opportunistic_sysret_failed - testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ + /* + * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, + * restoring TF results in a trap from userspace immediately after + * SYSRET. This would cause an infinite loop whenever #DB happens + * with register state that satisfies the opportunistic SYSRET + * conditions. For example, single-stepping this user code: + * + * movq $stuck_here,%rcx + * pushfq + * popq %r11 + * stuck_here: + * + * would never get past 'stuck_here'. + */ + testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 jnz opportunistic_sysret_failed /* nothing to check for RSP */ @@ -1208,6 +1235,9 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) popq %rsp CFI_DEF_CFA_REGISTER rsp decl PER_CPU_VAR(irq_count) +#ifndef CONFIG_PREEMPT + call xen_maybe_preempt_hcall +#endif jmp error_exit CFI_ENDPROC END(xen_do_hypervisor_callback) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 7ec1d5f8d283..25ecd56cefa8 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -72,7 +72,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { "bx", 8, offsetof(struct pt_regs, bx) }, { "cx", 8, offsetof(struct pt_regs, cx) }, { "dx", 8, offsetof(struct pt_regs, dx) }, - { "si", 8, offsetof(struct pt_regs, dx) }, + { "si", 8, offsetof(struct pt_regs, si) }, { "di", 8, offsetof(struct pt_regs, di) }, { "bp", 8, offsetof(struct pt_regs, bp) }, { "sp", 8, offsetof(struct pt_regs, sp) }, diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6a1146ea4d4d..4e3d5a9621fe 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -223,27 +223,48 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; + unsigned long faddr; kp = get_kprobe((void *)addr); - /* There is no probe, return original address */ - if (!kp) + faddr = ftrace_location(addr); + /* + * Addresses inside the ftrace location are refused by + * arch_check_ftrace_location(). Something went terribly wrong + * if such an address is checked here. + */ + if (WARN_ON(faddr && faddr != addr)) + return 0UL; + /* + * Use the current code if it is not modified by Kprobe + * and it cannot be modified by ftrace. + */ + if (!kp && !faddr) return addr; /* - * Basically, kp->ainsn.insn has an original instruction. - * However, RIP-relative instruction can not do single-stepping - * at different place, __copy_instruction() tweaks the displacement of - * that instruction. In that case, we can't recover the instruction - * from the kp->ainsn.insn. + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, __copy_instruction() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. * - * On the other hand, kp->opcode has a copy of the first byte of - * the probed instruction, which is overwritten by int3. And - * the instruction at kp->addr is not modified by kprobes except - * for the first byte, we can recover the original instruction - * from it and kp->opcode. + * On the other hand, in case on normal Kprobe, kp->opcode has a copy + * of the first byte of the probed instruction, which is overwritten + * by int3. And the instruction at kp->addr is not modified by kprobes + * except for the first byte, we can recover the original instruction + * from it and kp->opcode. + * + * In case of Kprobes using ftrace, we do not have a copy of + * the original instruction. In fact, the ftrace location might + * be modified at anytime and even could be in an inconsistent state. + * Fortunately, we know that the original code is the ideal 5-byte + * long NOP. */ - memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - buf[0] = kp->opcode; + memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (faddr) + memcpy(buf, ideal_nops[NOP_ATOMIC5], 5); + else + buf[0] = kp->opcode; return (unsigned long)buf; } @@ -251,6 +272,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Recover the probed instruction at addr for further analysis. * Caller must lock kprobes by kprobe_mutex, or disable preemption * for preventing to release referencing kprobes. + * Returns zero if the instruction can not get recovered. */ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) { @@ -285,6 +307,8 @@ static int can_probe(unsigned long paddr) * normally used, we just go through if there is no kprobe. */ __addr = recover_probed_instruction(buf, addr); + if (!__addr) + return 0; kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); insn_get_length(&insn); @@ -333,6 +357,8 @@ int __copy_instruction(u8 *dest, u8 *src) unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); + if (!recovered_insn) + return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint, failed to recover */ diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 0dd8d089c315..7b3b9d15c47a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -259,6 +259,8 @@ static int can_optimize(unsigned long paddr) */ return 0; recovered_insn = recover_probed_instruction(buf, addr); + if (!recovered_insn) + return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); /* Another subsystem puts a breakpoint */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 9bbb9b35c144..d1ac80b72c72 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -47,13 +47,21 @@ do { \ #ifdef CONFIG_RANDOMIZE_BASE static unsigned long module_load_offset; +static int randomize_modules = 1; /* Mutex protects the module_load_offset. */ static DEFINE_MUTEX(module_kaslr_mutex); +static int __init parse_nokaslr(char *p) +{ + randomize_modules = 0; + return 0; +} +early_param("nokaslr", parse_nokaslr); + static unsigned long int get_module_load_offset(void) { - if (kaslr_enabled) { + if (randomize_modules) { mutex_lock(&module_kaslr_mutex); /* * Calculate the module_load_offset the first time this diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index bae6c609888e..86db4bcd7ce5 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -183,6 +183,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, }, + /* ASRock */ + { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ + .callback = set_pci_reboot, + .ident = "ASRock Q1900DC-ITX", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"), + DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"), + }, + }, + /* ASUS */ { /* Handle problems with rebooting on ASUS P4S800 */ .callback = set_bios_reboot, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 98dc9317286e..0a2421cca01f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -122,8 +122,6 @@ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; -bool __read_mostly kaslr_enabled = false; - #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); #endif @@ -427,11 +425,6 @@ static void __init reserve_initrd(void) } #endif /* CONFIG_BLK_DEV_INITRD */ -static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) -{ - kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); -} - static void __init parse_setup_data(void) { struct setup_data *data; @@ -457,9 +450,6 @@ static void __init parse_setup_data(void) case SETUP_EFI: parse_efi_setup(pa_data, data_len); break; - case SETUP_KASLR: - parse_kaslr_setup(pa_data, data_len); - break; default: break; } @@ -842,14 +832,10 @@ static void __init trim_low_memory_range(void) static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - if (kaslr_enabled) - pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", - (unsigned long)&_text - __START_KERNEL, - __START_KERNEL, - __START_KERNEL_map, - MODULES_VADDR-1); - else - pr_emerg("Kernel Offset: disabled\n"); + pr_emerg("Kernel Offset: 0x%lx from 0x%lx " + "(relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, __START_KERNEL, + __START_KERNEL_map, MODULES_VADDR-1); return 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9d2073e2ecc9..4ff5d162ff9f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) goto exit; conditional_sti(regs); - if (!user_mode(regs)) + if (!user_mode_vm(regs)) die("bounds", regs, error_code); if (!cpu_feature_enabled(X86_FEATURE_MPX)) { @@ -637,7 +637,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. */ - if (!dr6 && user_mode(regs)) + if (!dr6 && user_mode_vm(regs)) user_icebp = 1; /* Catch kmemcheck conditions first of all! */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 34f66e58a896..cdc6cf903078 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Sanitize the copied state etc. */ - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; @@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ drop_fpu(tsk); - if (__copy_from_user(xsave, buf_fx, state_size) || + if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { + fpu_finit(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - set_used_math(); } + set_used_math(); if (use_eager_fpu()) { preempt_disable(); math_state_restore(); |