diff options
Diffstat (limited to 'arch/x86/kernel')
27 files changed, 464 insertions, 605 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0f15af41bd80..514064897d55 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,8 +23,10 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n CFLAGS_irq.o := -I$(src)/../include/asm/trace obj-y := process_$(BITS).o signal.o +obj-$(CONFIG_COMPAT) += signal_compat.o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o +obj-y += time.o ioport.o dumpstack.o nmi.o +obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index ede92c3364d3..222a57076039 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -263,7 +263,7 @@ static int apbt_clocksource_register(void) /* Verify whether apbt counter works */ t1 = dw_apb_clocksource_read(clocksource_apbt); - rdtscll(start); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -273,7 +273,7 @@ static int apbt_clocksource_register(void) */ do { rep_nop(); - rdtscll(now); + now = rdtsc(); } while ((now - start) < 200000UL); /* APBT is the only always on clocksource, it has to work! */ @@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void) old = dw_apb_clocksource_read(clocksource_apbt); old += loop; - t1 = __native_read_tsc(); + t1 = rdtsc(); do { new = dw_apb_clocksource_read(clocksource_apbt); } while (new < old); - t2 = __native_read_tsc(); + t2 = rdtsc(); shift = 5; if (unlikely(loop >> shift == 0)) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cde732c1b495..5aba9220a5ac 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; - rdtscll(tsc); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; } @@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) unsigned long pm = acpi_pm_read_early(); if (cpu_has_tsc) - rdtscll(tsc); + tsc = rdtsc(); switch (lapic_cal_loops++) { case 0: @@ -1209,7 +1209,7 @@ void setup_local_APIC(void) long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) - rdtscll(tsc); + tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1293,7 +1293,7 @@ void setup_local_APIC(void) } if (queued) { if (cpu_has_tsc && cpu_khz) { - rdtscll(ntsc); + ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else max_loops--; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd3a4baffe50..4a70fc6d400a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -11,6 +11,7 @@ #include <asm/cpu.h> #include <asm/smp.h> #include <asm/pci-direct.h> +#include <asm/delay.h> #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> @@ -114,7 +115,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) const int K6_BUG_LOOP = 1000000; int n; void (*f_vide)(void); - unsigned long d, d2; + u64 d, d2; printk(KERN_INFO "AMD K6 stepping B detected - "); @@ -125,10 +126,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c) n = K6_BUG_LOOP; f_vide = vide; - rdtscl(d); + d = rdtsc(); while (n--) f_vide(); - rdtscl(d2); + d2 = rdtsc(); d = d2-d; if (d > 20*K6_BUG_LOOP) @@ -506,6 +507,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) /* A random value per boot for bit slice [12:upper_bit) */ va_align.bits = get_random_int() & va_align.mask; } + + if (cpu_has(c, X86_FEATURE_MWAITX)) + use_mwaitx_delay(); } static void early_init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb9e5df42dd2..b128808853a2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1185,10 +1185,10 @@ void syscall_init(void) * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, entry_SYSCALL_64); + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION - wrmsrl(MSR_CSTAR, entry_SYSCALL_compat); + wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); /* * This only works on Intel CPUs. * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. @@ -1199,7 +1199,7 @@ void syscall_init(void) wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else - wrmsrl(MSR_CSTAR, ignore_sysret); + wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0f8f21c8284a..9d014b82a124 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -127,7 +127,7 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - rdtscll(m->tsc); + m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -974,7 +974,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) { struct mca_config *cfg = &mca_cfg; struct mce m, *final; - enum ctx_state prev_state; int i; int worst = 0; int severity; @@ -1000,7 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) int flags = MF_ACTION_REQUIRED; int lmce = 0; - prev_state = ist_enter(regs); + ist_enter(regs); this_cpu_inc(mce_exception_count); @@ -1166,7 +1165,7 @@ out: local_irq_disable(); ist_end_non_atomic(); done: - ist_exit(regs, prev_state); + ist_exit(regs); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1754,7 +1753,7 @@ static void collect_tscs(void *data) { unsigned long *cpu_tsc = (unsigned long *)data; - rdtscll(cpu_tsc[smp_processor_id()]); + cpu_tsc[smp_processor_id()] = rdtsc(); } static int mce_apei_read_done; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 737b0ad4e61a..12402e10aeff 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -19,10 +19,9 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; u32 loaddr, hi, lotype; - prev_state = ist_enter(regs); + ist_enter(regs); rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -39,7 +38,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs, prev_state); + ist_exit(regs); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 44f138296fbe..01dd8702880b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -15,12 +15,12 @@ /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state = ist_enter(regs); + ist_enter(regs); printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs, prev_state); + ist_exit(regs); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f56cf074d01a..66dd3fe99b82 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; if (idx > LDT_ENTRIES) @@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int segment) return 0; desc = &ldt->entries[idx]; +#else + return 0; +#endif } else { if (idx > GDT_ENTRIES) return 0; @@ -2200,7 +2204,7 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_IA32_EMULATION #include <asm/compat.h> diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index ce95676abd60..4d38416e2a7f 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -110,7 +110,7 @@ static void init_espfix_random(void) */ if (!arch_get_random_long(&rand)) { /* The constant is an arbitrary large prime */ - rdtscll(rand); + rand = rdtsc(); rand *= 0xc345c6b72fd16123UL; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 10757d0a3fcf..f75c5908c7a6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -735,7 +735,7 @@ static int hpet_clocksource_register(void) /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); - rdtscll(start); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -745,7 +745,7 @@ static int hpet_clocksource_register(void) */ do { rep_nop(); - rdtscll(now); + now = rdtsc(); } while ((now - start) < 200000UL); if (t1 == hpet_readl(HPET_COUNTER)) { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c7dfe1be784e..4616672a4049 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -216,8 +216,23 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) unsigned vector = ~regs->orig_ax; unsigned irq; + /* + * NB: Unlike exception entries, IRQ entries do not reliably + * handle context tracking in the low-level entry code. This is + * because syscall entries execute briefly with IRQs on before + * updating context tracking state, so we can take an IRQ from + * kernel mode with CONTEXT_USER. The low-level entry code only + * updates the context if we came from user mode, so we won't + * switch to CONTEXT_KERNEL. We'll fix that once the syscall + * code is cleaned up enough that we can cleanly defer enabling + * IRQs. + */ + entering_irq(); + /* entering_irq() tells RCU that we're not quiescent. Check it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); + irq = __this_cpu_read(vector_irq[vector]); if (!handle_irq(irq, regs)) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d05bd2e2ee91..697f90db0e37 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -213,7 +213,7 @@ static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ - if (nmi_handle(NMI_SERR, regs, false)) + if (nmi_handle(NMI_SERR, regs)) return; pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", @@ -247,7 +247,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) unsigned long i; /* check to see if anyone registered against these types of errors */ - if (nmi_handle(NMI_IO_CHECK, regs, false)) + if (nmi_handle(NMI_IO_CHECK, regs)) return; pr_emerg( @@ -284,7 +284,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) * as only the first one is ever run (unless it can actually determine * if it caused the NMI) */ - handled = nmi_handle(NMI_UNKNOWN, regs, false); + handled = nmi_handle(NMI_UNKNOWN, regs); if (handled) { __this_cpu_add(nmi_stats.unknown, handled); return; @@ -332,7 +332,7 @@ static void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); - handled = nmi_handle(NMI_LOCAL, regs, b2b); + handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); if (handled) { /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 58bcfb67c01f..f68e48f5f6c2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -351,9 +351,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, .write_msr = native_write_msr_safe, - .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .read_tscp = native_read_tscp, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index e1b013696dde..c89f50a76e97 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); -DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); @@ -52,7 +51,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); - PATCH_SITE(pv_cpu_ops, read_tsc); #if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): if (pv_is_native_spin_unlock()) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d83740ab85b0..6d0e62ae8516 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -30,6 +30,7 @@ #include <asm/nmi.h> #include <asm/tlbflush.h> #include <asm/mce.h> +#include <asm/vm86.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -111,6 +112,8 @@ void exit_thread(void) kfree(bp); } + free_vm86(t); + fpu__drop(fpu); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f73c962fe636..c13df2c735f8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -53,6 +53,7 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> #include <asm/switch_to.h> +#include <asm/vm86.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f6b916387590..3c1bbcf12924 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { +#ifdef CONFIG_MODIFY_LDT_SYSCALL if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, @@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task) dead_task->mm->context.ldt->size); BUG(); } +#endif } } @@ -248,8 +250,8 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) __USER_CS, __USER_DS, 0); } -#ifdef CONFIG_IA32_EMULATION -void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) +#ifdef CONFIG_COMPAT +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) { start_thread_common(regs, new_ip, new_sp, test_thread_flag(TIF_X32) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9be72bc3613f..558f50edebca 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -37,12 +37,10 @@ #include <asm/proto.h> #include <asm/hw_breakpoint.h> #include <asm/traps.h> +#include <asm/syscall.h> #include "tls.h" -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - enum x86_regset { REGSET_GENERAL, REGSET_FP, @@ -1123,6 +1121,73 @@ static int genregs32_set(struct task_struct *target, return ret; } +static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + void __user *datap = compat_ptr(data); + int ret; + __u32 val; + + switch (request) { + case PTRACE_PEEKUSR: + ret = getreg32(child, addr, &val); + if (ret == 0) + ret = put_user(val, (__u32 __user *)datap); + break; + + case PTRACE_POKEUSR: + ret = putreg32(child, addr, data); + break; + + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct32), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_GENERAL, 0, + sizeof(struct user_regs_struct32), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_FP, 0, + sizeof(struct user_i387_ia32_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user( + child, &user_x86_32_view, REGSET_FP, + 0, sizeof(struct user_i387_ia32_struct), datap); + + case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ + return copy_regset_to_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ + return copy_regset_from_user(child, &user_x86_32_view, + REGSET_XFP, 0, + sizeof(struct user32_fxsr_struct), + datap); + + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + + default: + return compat_ptrace_request(child, request, addr, data); + } + + return ret; +} +#endif /* CONFIG_IA32_EMULATION */ + #ifdef CONFIG_X86_X32_ABI static long x32_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, @@ -1211,78 +1276,21 @@ static long x32_arch_ptrace(struct task_struct *child, } #endif +#ifdef CONFIG_COMPAT long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned long addr = caddr; - unsigned long data = cdata; - void __user *datap = compat_ptr(data); - int ret; - __u32 val; - #ifdef CONFIG_X86_X32_ABI if (!is_ia32_task()) return x32_arch_ptrace(child, request, caddr, cdata); #endif - - switch (request) { - case PTRACE_PEEKUSR: - ret = getreg32(child, addr, &val); - if (ret == 0) - ret = put_user(val, (__u32 __user *)datap); - break; - - case PTRACE_POKEUSR: - ret = putreg32(child, addr, data); - break; - - case PTRACE_GETREGS: /* Get all gp regs from the child. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_GENERAL, - 0, sizeof(struct user_regs_struct32), - datap); - - case PTRACE_SETREGS: /* Set all gp regs in the child. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_GENERAL, 0, - sizeof(struct user_regs_struct32), - datap); - - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_FP, 0, - sizeof(struct user_i387_ia32_struct), - datap); - - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - return copy_regset_from_user( - child, &user_x86_32_view, REGSET_FP, - 0, sizeof(struct user_i387_ia32_struct), datap); - - case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ - return copy_regset_to_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ - return copy_regset_from_user(child, &user_x86_32_view, - REGSET_XFP, 0, - sizeof(struct user32_fxsr_struct), - datap); - - case PTRACE_GET_THREAD_AREA: - case PTRACE_SET_THREAD_AREA: - return arch_ptrace(child, request, addr, data); - - default: - return compat_ptrace_request(child, request, addr, data); - } - - return ret; +#ifdef CONFIG_IA32_EMULATION + return ia32_arch_ptrace(child, request, caddr, cdata); +#else + return 0; +#endif } - -#endif /* CONFIG_IA32_EMULATION */ +#endif /* CONFIG_COMPAT */ #ifdef CONFIG_X86_64 @@ -1434,201 +1442,3 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); } - -static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) -{ -#ifdef CONFIG_X86_64 - if (arch == AUDIT_ARCH_X86_64) { - audit_syscall_entry(regs->orig_ax, regs->di, - regs->si, regs->dx, regs->r10); - } else -#endif - { - audit_syscall_entry(regs->orig_ax, regs->bx, - regs->cx, regs->dx, regs->si); - } -} - -/* - * We can return 0 to resume the syscall or anything else to go to phase - * 2. If we resume the syscall, we need to put something appropriate in - * regs->orig_ax. - * - * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax - * are fully functional. - * - * For phase 2's benefit, our return value is: - * 0: resume the syscall - * 1: go to phase 2; no seccomp phase 2 needed - * anything else: go to phase 2; pass return value to seccomp - */ -unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) -{ - unsigned long ret = 0; - u32 work; - - BUG_ON(regs != task_pt_regs(current)); - - work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; - - /* - * If TIF_NOHZ is set, we are required to call user_exit() before - * doing anything that could touch RCU. - */ - if (work & _TIF_NOHZ) { - user_exit(); - work &= ~_TIF_NOHZ; - } - -#ifdef CONFIG_SECCOMP - /* - * Do seccomp first -- it should minimize exposure of other - * code, and keeping seccomp fast is probably more valuable - * than the rest of this. - */ - if (work & _TIF_SECCOMP) { - struct seccomp_data sd; - - sd.arch = arch; - sd.nr = regs->orig_ax; - sd.instruction_pointer = regs->ip; -#ifdef CONFIG_X86_64 - if (arch == AUDIT_ARCH_X86_64) { - sd.args[0] = regs->di; - sd.args[1] = regs->si; - sd.args[2] = regs->dx; - sd.args[3] = regs->r10; - sd.args[4] = regs->r8; - sd.args[5] = regs->r9; - } else -#endif - { - sd.args[0] = regs->bx; - sd.args[1] = regs->cx; - sd.args[2] = regs->dx; - sd.args[3] = regs->si; - sd.args[4] = regs->di; - sd.args[5] = regs->bp; - } - - BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); - BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); - - ret = seccomp_phase1(&sd); - if (ret == SECCOMP_PHASE1_SKIP) { - regs->orig_ax = -1; - ret = 0; - } else if (ret != SECCOMP_PHASE1_OK) { - return ret; /* Go directly to phase 2 */ - } - - work &= ~_TIF_SECCOMP; - } -#endif - - /* Do our best to finish without phase 2. */ - if (work == 0) - return ret; /* seccomp and/or nohz only (ret == 0 here) */ - -#ifdef CONFIG_AUDITSYSCALL - if (work == _TIF_SYSCALL_AUDIT) { - /* - * If there is no more work to be done except auditing, - * then audit in phase 1. Phase 2 always audits, so, if - * we audit here, then we can't go on to phase 2. - */ - do_audit_syscall_entry(regs, arch); - return 0; - } -#endif - - return 1; /* Something is enabled that we can't handle in phase 1 */ -} - -/* Returns the syscall nr to run (which should match regs->orig_ax). */ -long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, - unsigned long phase1_result) -{ - long ret = 0; - u32 work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; - - BUG_ON(regs != task_pt_regs(current)); - - /* - * If we stepped into a sysenter/syscall insn, it trapped in - * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. - * If user-mode had set TF itself, then it's still clear from - * do_debug() and we need to set it again to restore the user - * state. If we entered on the slow path, TF was already set. - */ - if (work & _TIF_SINGLESTEP) - regs->flags |= X86_EFLAGS_TF; - -#ifdef CONFIG_SECCOMP - /* - * Call seccomp_phase2 before running the other hooks so that - * they can see any changes made by a seccomp tracer. - */ - if (phase1_result > 1 && seccomp_phase2(phase1_result)) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; - } -#endif - - if (unlikely(work & _TIF_SYSCALL_EMU)) - ret = -1L; - - if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - ret = -1L; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->orig_ax); - - do_audit_syscall_entry(regs, arch); - - return ret ?: regs->orig_ax; -} - -long syscall_trace_enter(struct pt_regs *regs) -{ - u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; - unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); - - if (phase1_result == 0) - return regs->orig_ax; - else - return syscall_trace_enter_phase2(regs, arch, phase1_result); -} - -void syscall_trace_leave(struct pt_regs *regs) -{ - bool step; - - /* - * We may come here right after calling schedule_user() - * or do_notify_resume(), in which case we can be in RCU - * user mode. - */ - user_exit(); - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->ax); - - /* - * If TIF_SYSCALL_EMU is set, we only get here because of - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). - * We already reported this syscall instruction in - * syscall_trace_enter(). - */ - step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && - !test_thread_flag(TIF_SYSCALL_EMU); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); - - user_enter(); -} diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 71820c42b6ce..da52e6bb5c7f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -31,11 +31,11 @@ #include <asm/vdso.h> #include <asm/mce.h> #include <asm/sighandling.h> +#include <asm/vm86.h> #ifdef CONFIG_X86_64 #include <asm/proto.h> #include <asm/ia32_unistd.h> -#include <asm/sys_ia32.h> #endif /* CONFIG_X86_64 */ #include <asm/syscall.h> @@ -632,6 +632,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) bool stepping, failed; struct fpu *fpu = ¤t->thread.fpu; + if (v8086_mode(regs)) + save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); + /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ @@ -697,7 +700,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -static void do_signal(struct pt_regs *regs) +void do_signal(struct pt_regs *regs) { struct ksignal ksig; @@ -732,32 +735,6 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } -/* - * notification of userspace execution resumption - * - triggered by the TIF_WORK_MASK flags - */ -__visible void -do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) -{ - user_exit(); - - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } - if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) - fire_user_return_notifiers(); - - user_enter(); -} - void signal_fault(struct pt_regs *regs, void __user *frame, char *where) { struct task_struct *me = current; diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c new file mode 100644 index 000000000000..dc3c0b1c816f --- /dev/null +++ b/arch/x86/kernel/signal_compat.c @@ -0,0 +1,95 @@ +#include <linux/compat.h> +#include <linux/uaccess.h> + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +{ + int err = 0; + bool ia32 = test_thread_flag(TIF_IA32); + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_SYS >> 16: + put_user_ex(from->si_syscall, &to->si_syscall); + put_user_ex(from->si_arch, &to->si_arch); + break; + case __SI_CHLD >> 16: + if (ia32) { + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + } else { + put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); + put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); + } + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } + } + } put_user_catch(err); + + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err = 0; + u32 ptr32; + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); + + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); + + return err; +} diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0ccb53a9fcd9..c9a073866ca7 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re return addr; } +#ifdef CONFIG_MODIFY_LDT_SYSCALL /* * We'll assume that the code segments in the GDT * are all zero-based. That is largely true: the @@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re } mutex_unlock(&child->mm->context.lock); } +#endif return addr; } diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index 25b993729f9b..80bb24d9b880 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c @@ -12,10 +12,5 @@ */ u64 notrace trace_clock_x86_tsc(void) { - u64 ret; - - rdtsc_barrier(); - rdtscll(ret); - - return ret; + return rdtsc_ordered(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5a5231d1d11..346eec73f7db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -62,6 +62,7 @@ #include <asm/fpu/xstate.h> #include <asm/trace/mpx.h> #include <asm/mpx.h> +#include <asm/vm86.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -108,13 +109,10 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -enum ctx_state ist_enter(struct pt_regs *regs) +void ist_enter(struct pt_regs *regs) { - enum ctx_state prev_state; - if (user_mode(regs)) { - /* Other than that, we're just an exception. */ - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); } else { /* * We might have interrupted pretty much anything. In @@ -123,32 +121,25 @@ enum ctx_state ist_enter(struct pt_regs *regs) * but we need to notify RCU. */ rcu_nmi_enter(); - prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */ } /* - * We are atomic because we're on the IST stack (or we're on x86_32, - * in which case we still shouldn't schedule). - * - * This must be after exception_enter(), because exception_enter() - * won't do anything if in_interrupt() returns true. + * We are atomic because we're on the IST stack; or we're on + * x86_32, in which case we still shouldn't schedule; or we're + * on x86_64 and entered from user mode, in which case we're + * still atomic unless ist_begin_non_atomic is called. */ preempt_count_add(HARDIRQ_OFFSET); /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); - - return prev_state; } -void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) +void ist_exit(struct pt_regs *regs) { - /* Must be before exception_exit. */ preempt_count_sub(HARDIRQ_OFFSET); - if (user_mode(regs)) - return exception_exit(prev_state); - else + if (!user_mode(regs)) rcu_nmi_exit(); } @@ -162,7 +153,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) * a double fault, it can be safe to schedule. ist_begin_non_atomic() * begins a non-atomic section within an ist_enter()/ist_exit() region. * Callers are responsible for enabling interrupts themselves inside - * the non-atomic section, and callers must call is_end_non_atomic() + * the non-atomic section, and callers must call ist_end_non_atomic() * before ist_exit(). */ void ist_begin_non_atomic(struct pt_regs *regs) @@ -289,17 +280,16 @@ NOKPROBE_SYMBOL(do_trap); static void do_error_trap(struct pt_regs *regs, long error_code, char *str, unsigned long trapnr, int signr) { - enum ctx_state prev_state = exception_enter(); siginfo_t info; + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { conditional_sti(regs); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } - - exception_exit(prev_state); } #define DO_ERROR(trapnr, signr, str, name) \ @@ -351,7 +341,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif - ist_enter(regs); /* Discard prev_state because we won't return. */ + ist_enter(regs); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -371,14 +361,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; const struct bndcsr *bndcsr; siginfo_t *info; - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) - goto exit; + return; conditional_sti(regs); if (!user_mode(regs)) @@ -435,9 +424,8 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) die("bounds", regs, error_code); } -exit: - exception_exit(prev_state); return; + exit_trap: /* * This path out is for all the cases where we could not @@ -447,35 +435,33 @@ exit_trap: * time.. */ do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); - exception_exit(prev_state); } dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; - enum ctx_state prev_state; - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); conditional_sti(regs); if (v8086_mode(regs)) { local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - goto exit; + return; } tsk = current; if (!user_mode(regs)) { if (fixup_exception(regs)) - goto exit; + return; tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; if (notify_die(DIE_GPF, "general protection fault", regs, error_code, X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) die("general protection fault", regs, error_code); - goto exit; + return; } tsk->thread.error_code = error_code; @@ -491,16 +477,12 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); -exit: - exception_exit(prev_state); } NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - #ifdef CONFIG_DYNAMIC_FTRACE /* * ftrace must be first, everything else may cause a recursive crash. @@ -513,7 +495,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; - prev_state = ist_enter(regs); + ist_enter(regs); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -539,7 +522,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - ist_exit(regs, prev_state); + ist_exit(regs); } NOKPROBE_SYMBOL(do_int3); @@ -615,12 +598,11 @@ NOKPROBE_SYMBOL(fixup_bad_iret); dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - enum ctx_state prev_state; int user_icebp = 0; unsigned long dr6; int si_code; - prev_state = ist_enter(regs); + ist_enter(regs); get_debugreg(dr6, 6); @@ -695,7 +677,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - ist_exit(regs, prev_state); + ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -747,21 +729,15 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); math_error(regs, error_code, X86_TRAP_MF); - exception_exit(prev_state); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); math_error(regs, error_code, X86_TRAP_XF); - exception_exit(prev_state); } dotraplinkage void @@ -773,9 +749,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { - enum ctx_state prev_state; - - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); BUG_ON(use_eager_fpu()); #ifdef CONFIG_MATH_EMULATION @@ -786,7 +760,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); - exception_exit(prev_state); return; } #endif @@ -794,7 +767,6 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif - exception_exit(prev_state); } NOKPROBE_SYMBOL(do_device_not_available); @@ -802,9 +774,8 @@ NOKPROBE_SYMBOL(do_device_not_available); dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; - enum ctx_state prev_state; - prev_state = exception_enter(); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); info.si_signo = SIGILL; @@ -816,7 +787,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, &info); } - exception_exit(prev_state); } #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 88e9a38c71a5..79055cf2c497 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) data = cyc2ns_write_begin(cpu); - rdtscll(tsc_now); + tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -290,7 +290,7 @@ u64 native_sched_clock(void) } /* read the Time Stamp Counter: */ - rdtscll(tsc_now); + tsc_now = rdtsc(); /* return the value in ns */ return cycles_2_ns(tsc_now); @@ -316,12 +316,6 @@ unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); #endif -unsigned long long native_read_tsc(void) -{ - return __native_read_tsc(); -} -EXPORT_SYMBOL(native_read_tsc); - int check_tsc_unstable(void) { return tsc_unstable; @@ -984,7 +978,7 @@ static struct clocksource clocksource_tsc; */ static cycle_t read_tsc(struct clocksource *cs) { - return (cycle_t)get_cycles(); + return (cycle_t)rdtsc_ordered(); } /* diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index dd8d0791dfb5..78083bf23ed1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -39,16 +39,15 @@ static cycles_t max_warp; static int nr_warps; /* - * TSC-warp measurement loop running on both CPUs: + * TSC-warp measurement loop running on both CPUs. This is not called + * if there is no TSC. */ static void check_tsc_warp(unsigned int timeout) { cycles_t start, now, prev, end; int i; - rdtsc_barrier(); - start = get_cycles(); - rdtsc_barrier(); + start = rdtsc_ordered(); /* * The measurement runs for 'timeout' msecs: */ @@ -63,9 +62,7 @@ static void check_tsc_warp(unsigned int timeout) */ arch_spin_lock(&sync_lock); prev = last_tsc; - rdtsc_barrier(); - now = get_cycles(); - rdtsc_barrier(); + now = rdtsc_ordered(); last_tsc = now; arch_spin_unlock(&sync_lock); @@ -126,7 +123,7 @@ void check_tsc_sync_source(int cpu) /* * No need to check if we already know that the TSC is not - * synchronized: + * synchronized or if we have no TSC. */ if (unsynchronized_tsc()) return; @@ -190,6 +187,7 @@ void check_tsc_sync_target(void) { int cpus = 2; + /* Also aborts if there is no TSC. */ if (unsynchronized_tsc() || tsc_clocksource_reliable) return; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index fc9db6ef2a95..abd8b856bd2b 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -44,11 +44,14 @@ #include <linux/ptrace.h> #include <linux/audit.h> #include <linux/stddef.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/tlbflush.h> #include <asm/irq.h> +#include <asm/traps.h> +#include <asm/vm86.h> /* * Known problems: @@ -66,10 +69,6 @@ */ -#define KVM86 ((struct kernel_vm86_struct *)regs) -#define VMPI KVM86->vm86plus - - /* * 8- and 16-bit register defines.. */ @@ -81,8 +80,8 @@ /* * virtual flags (16 and 32-bit versions) */ -#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) -#define VEFLAGS (current->thread.v86flags) +#define VFLAGS (*(unsigned short *)&(current->thread.vm86->veflags)) +#define VEFLAGS (current->thread.vm86->veflags) #define set_flags(X, new, mask) \ ((X) = ((X) & ~(mask)) | ((new) & (mask))) @@ -90,46 +89,13 @@ #define SAFE_MASK (0xDD5) #define RETURN_MASK (0xDFF) -/* convert kernel_vm86_regs to vm86_regs */ -static int copy_vm86_regs_to_user(struct vm86_regs __user *user, - const struct kernel_vm86_regs *regs) -{ - int ret = 0; - - /* - * kernel_vm86_regs is missing gs, so copy everything up to - * (but not including) orig_eax, and then rest including orig_eax. - */ - ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - ret += copy_to_user(&user->orig_eax, ®s->pt.orig_ax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax)); - - return ret; -} - -/* convert vm86_regs to kernel_vm86_regs */ -static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, - const struct vm86_regs __user *user, - unsigned extra) -{ - int ret = 0; - - /* copy ax-fs inclusive */ - ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax)); - /* copy orig_ax-__gsh+extra */ - ret += copy_from_user(®s->pt.orig_ax, &user->orig_eax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_ax) + - extra); - return ret; -} - -struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) +void save_v86_state(struct kernel_vm86_regs *regs, int retval) { struct tss_struct *tss; - struct pt_regs *ret; - unsigned long tmp; + struct task_struct *tsk = current; + struct vm86plus_struct __user *user; + struct vm86 *vm86 = current->thread.vm86; + long err = 0; /* * This gets called from entry.S with interrupts disabled, but @@ -138,31 +104,57 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) */ local_irq_enable(); - if (!current->thread.vm86_info) { - pr_alert("no vm86_info: BAD\n"); + if (!vm86 || !vm86->user_vm86) { + pr_alert("no user_vm86: BAD\n"); do_exit(SIGSEGV); } - set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); - tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); - tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); - if (tmp) { - pr_alert("could not access userspace vm86_info\n"); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); + user = vm86->user_vm86; + + if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ? + sizeof(struct vm86plus_struct) : + sizeof(struct vm86_struct))) { + pr_alert("could not access userspace vm86 info\n"); + do_exit(SIGSEGV); + } + + put_user_try { + put_user_ex(regs->pt.bx, &user->regs.ebx); + put_user_ex(regs->pt.cx, &user->regs.ecx); + put_user_ex(regs->pt.dx, &user->regs.edx); + put_user_ex(regs->pt.si, &user->regs.esi); + put_user_ex(regs->pt.di, &user->regs.edi); + put_user_ex(regs->pt.bp, &user->regs.ebp); + put_user_ex(regs->pt.ax, &user->regs.eax); + put_user_ex(regs->pt.ip, &user->regs.eip); + put_user_ex(regs->pt.cs, &user->regs.cs); + put_user_ex(regs->pt.flags, &user->regs.eflags); + put_user_ex(regs->pt.sp, &user->regs.esp); + put_user_ex(regs->pt.ss, &user->regs.ss); + put_user_ex(regs->es, &user->regs.es); + put_user_ex(regs->ds, &user->regs.ds); + put_user_ex(regs->fs, &user->regs.fs); + put_user_ex(regs->gs, &user->regs.gs); + + put_user_ex(vm86->screen_bitmap, &user->screen_bitmap); + } put_user_catch(err); + if (err) { + pr_alert("could not access userspace vm86 info\n"); do_exit(SIGSEGV); } tss = &per_cpu(cpu_tss, get_cpu()); - current->thread.sp0 = current->thread.saved_sp0; - current->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, ¤t->thread); - current->thread.saved_sp0 = 0; + tsk->thread.sp0 = vm86->saved_sp0; + tsk->thread.sysenter_cs = __KERNEL_CS; + load_sp0(tss, &tsk->thread); + vm86->saved_sp0 = 0; put_cpu(); - ret = KVM86->regs32; + memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); - ret->fs = current->thread.saved_fs; - set_user_gs(ret, current->thread.saved_gs); + lazy_load_gs(vm86->regs32.gs); - return ret; + regs->pt.ax = retval; } static void mark_screen_rdonly(struct mm_struct *mm) @@ -200,45 +192,16 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); +static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus); -SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86) +SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86) { - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk = current; - int tmp; - - if (tsk->thread.saved_sp0) - return -EPERM; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, vm86plus) - - sizeof(info.regs)); - if (tmp) - return -EFAULT; - memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = current_pt_regs(); - tsk->thread.vm86_info = v86; - do_sys_vm86(&info, tsk); - return 0; /* we never return here */ + return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false); } SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) { - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp; - struct vm86plus_struct __user *v86; - - tsk = current; switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: @@ -256,114 +219,133 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) } /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ - if (tsk->thread.saved_sp0) - return -EPERM; - v86 = (struct vm86plus_struct __user *)arg; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, regs32) - - sizeof(info.regs)); - if (tmp) - return -EFAULT; - info.regs32 = current_pt_regs(); - info.vm86plus.is_vm86pus = 1; - tsk->thread.vm86_info = (struct vm86_struct __user *)v86; - do_sys_vm86(&info, tsk); - return 0; /* we never return here */ + return do_sys_vm86((struct vm86plus_struct __user *) arg, true); } -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) +static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) { struct tss_struct *tss; -/* - * make sure the vm86() system call doesn't try to do anything silly - */ - info->regs.pt.ds = 0; - info->regs.pt.es = 0; - info->regs.pt.fs = 0; -#ifndef CONFIG_X86_32_LAZY_GS - info->regs.pt.gs = 0; -#endif + struct task_struct *tsk = current; + struct vm86 *vm86 = tsk->thread.vm86; + struct kernel_vm86_regs vm86regs; + struct pt_regs *regs = current_pt_regs(); + unsigned long err = 0; + + if (!vm86) { + if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) + return -ENOMEM; + tsk->thread.vm86 = vm86; + } + if (vm86->saved_sp0) + return -EPERM; + + if (!access_ok(VERIFY_READ, user_vm86, plus ? + sizeof(struct vm86_struct) : + sizeof(struct vm86plus_struct))) + return -EFAULT; + + memset(&vm86regs, 0, sizeof(vm86regs)); + get_user_try { + unsigned short seg; + get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx); + get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx); + get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx); + get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi); + get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi); + get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp); + get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax); + get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip); + get_user_ex(seg, &user_vm86->regs.cs); + vm86regs.pt.cs = seg; + get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags); + get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp); + get_user_ex(seg, &user_vm86->regs.ss); + vm86regs.pt.ss = seg; + get_user_ex(vm86regs.es, &user_vm86->regs.es); + get_user_ex(vm86regs.ds, &user_vm86->regs.ds); + get_user_ex(vm86regs.fs, &user_vm86->regs.fs); + get_user_ex(vm86regs.gs, &user_vm86->regs.gs); + + get_user_ex(vm86->flags, &user_vm86->flags); + get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap); + get_user_ex(vm86->cpu_type, &user_vm86->cpu_type); + } get_user_catch(err); + if (err) + return err; + + if (copy_from_user(&vm86->int_revectored, + &user_vm86->int_revectored, + sizeof(struct revectored_struct))) + return -EFAULT; + if (copy_from_user(&vm86->int21_revectored, + &user_vm86->int21_revectored, + sizeof(struct revectored_struct))) + return -EFAULT; + if (plus) { + if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus, + sizeof(struct vm86plus_info_struct))) + return -EFAULT; + vm86->vm86plus.is_vm86pus = 1; + } else + memset(&vm86->vm86plus, 0, + sizeof(struct vm86plus_info_struct)); + + memcpy(&vm86->regs32, regs, sizeof(struct pt_regs)); + vm86->user_vm86 = user_vm86; /* * The flags register is also special: we cannot trust that the user * has set it up safely, so this makes sure interrupt etc flags are * inherited from protected mode. */ - VEFLAGS = info->regs.pt.flags; - info->regs.pt.flags &= SAFE_MASK; - info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK; - info->regs.pt.flags |= X86_VM_MASK; + VEFLAGS = vm86regs.pt.flags; + vm86regs.pt.flags &= SAFE_MASK; + vm86regs.pt.flags |= regs->flags & ~SAFE_MASK; + vm86regs.pt.flags |= X86_VM_MASK; + + vm86regs.pt.orig_ax = regs->orig_ax; - switch (info->cpu_type) { + switch (vm86->cpu_type) { case CPU_286: - tsk->thread.v86mask = 0; + vm86->veflags_mask = 0; break; case CPU_386: - tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; case CPU_486: - tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; default: - tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; + vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL; break; } /* - * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) + * Save old state */ - info->regs32->ax = VM86_SIGNAL; - tsk->thread.saved_sp0 = tsk->thread.sp0; - tsk->thread.saved_fs = info->regs32->fs; - tsk->thread.saved_gs = get_user_gs(info->regs32); + vm86->saved_sp0 = tsk->thread.sp0; + lazy_save_gs(vm86->regs32.gs); tss = &per_cpu(cpu_tss, get_cpu()); - tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; + /* make room for real-mode segments */ + tsk->thread.sp0 += 16; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); put_cpu(); - tsk->thread.screen_bitmap = info->screen_bitmap; - if (info->flags & VM86_SCREEN_BITMAP) + if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - /*call __audit_syscall_exit since we do not exit via the normal paths */ -#ifdef CONFIG_AUDITSYSCALL - if (unlikely(current->audit_context)) - __audit_syscall_exit(1, 0); -#endif - - __asm__ __volatile__( - "movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" -#ifdef CONFIG_X86_32_LAZY_GS - "mov %2, %%gs\n\t" -#endif - "jmp resume_userspace" - : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); - /* we never return here */ -} - -static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) -{ - struct pt_regs *regs32; - - regs32 = save_v86_state(regs16); - regs32->ax = retval; - __asm__ __volatile__("movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "jmp resume_userspace" - : : "r" (regs32), "r" (current_thread_info())); + memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); + force_iret(); + return regs->ax; } static inline void set_IF(struct kernel_vm86_regs *regs) { VEFLAGS |= X86_EFLAGS_VIF; - if (VEFLAGS & X86_EFLAGS_VIP) - return_to_32bit(regs, VM86_STI); } static inline void clear_IF(struct kernel_vm86_regs *regs) @@ -395,7 +377,7 @@ static inline void clear_AC(struct kernel_vm86_regs *regs) static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs) { - set_flags(VEFLAGS, flags, current->thread.v86mask); + set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask); set_flags(regs->pt.flags, flags, SAFE_MASK); if (flags & X86_EFLAGS_IF) set_IF(regs); @@ -405,7 +387,7 @@ static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs) { - set_flags(VFLAGS, flags, current->thread.v86mask); + set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask); set_flags(regs->pt.flags, flags, SAFE_MASK); if (flags & X86_EFLAGS_IF) set_IF(regs); @@ -420,7 +402,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) if (VEFLAGS & X86_EFLAGS_VIF) flags |= X86_EFLAGS_IF; flags |= X86_EFLAGS_IOPL; - return flags | (VEFLAGS & current->thread.v86mask); + return flags | (VEFLAGS & current->thread.vm86->veflags_mask); } static inline int is_revectored(int nr, struct revectored_struct *bitmap) @@ -518,12 +500,13 @@ static void do_int(struct kernel_vm86_regs *regs, int i, { unsigned long __user *intr_ptr; unsigned long segoffs; + struct vm86 *vm86 = current->thread.vm86; if (regs->pt.cs == BIOSSEG) goto cannot_handle; - if (is_revectored(i, &KVM86->int_revectored)) + if (is_revectored(i, &vm86->int_revectored)) goto cannot_handle; - if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) + if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored)) goto cannot_handle; intr_ptr = (unsigned long __user *) (i << 2); if (get_user(segoffs, intr_ptr)) @@ -542,18 +525,16 @@ static void do_int(struct kernel_vm86_regs *regs, int i, return; cannot_handle: - return_to_32bit(regs, VM86_INTx + (i << 8)); + save_v86_state(regs, VM86_INTx + (i << 8)); } int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) { - if (VMPI.is_vm86pus) { + struct vm86 *vm86 = current->thread.vm86; + + if (vm86->vm86plus.is_vm86pus) { if ((trapno == 3) || (trapno == 1)) { - KVM86->regs32->ax = VM86_TRAP + (trapno << 8); - /* setting this flag forces the code in entry_32.S to - the path where we call save_v86_state() and change - the stack pointer to KVM86->regs32 */ - set_thread_flag(TIF_NOTIFY_RESUME); + save_v86_state(regs, VM86_TRAP + (trapno << 8)); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); @@ -574,16 +555,11 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) unsigned char __user *ssp; unsigned short ip, sp, orig_flags; int data32, pref_done; + struct vm86plus_info_struct *vmpi = ¤t->thread.vm86->vm86plus; #define CHECK_IF_IN_TRAP \ - if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ + if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \ newflags |= X86_EFLAGS_TF -#define VM86_FAULT_RETURN do { \ - if (VMPI.force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ - return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & X86_EFLAGS_TF) \ - handle_vm86_trap(regs, 0, 1); \ - return; } while (0) orig_flags = *(unsigned short *)®s->pt.flags; @@ -622,7 +598,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) SP(regs) -= 2; } IP(regs) = ip; - VM86_FAULT_RETURN; + goto vm86_fault_return; /* popf */ case 0x9d: @@ -642,16 +618,18 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) else set_vflags_short(newflags, regs); - VM86_FAULT_RETURN; + goto check_vip; } /* int xx */ case 0xcd: { int intno = popb(csp, ip, simulate_sigsegv); IP(regs) = ip; - if (VMPI.vm86dbg_active) { - if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3]) - return_to_32bit(regs, VM86_INTx + (intno << 8)); + if (vmpi->vm86dbg_active) { + if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) { + save_v86_state(regs, VM86_INTx + (intno << 8)); + return; + } } do_int(regs, intno, ssp, sp); return; @@ -682,14 +660,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) } else { set_vflags_short(newflags, regs); } - VM86_FAULT_RETURN; + goto check_vip; } /* cli */ case 0xfa: IP(regs) = ip; clear_IF(regs); - VM86_FAULT_RETURN; + goto vm86_fault_return; /* sti */ /* @@ -701,14 +679,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) case 0xfb: IP(regs) = ip; set_IF(regs); - VM86_FAULT_RETURN; + goto check_vip; default: - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } return; +check_vip: + if (VEFLAGS & X86_EFLAGS_VIP) { + save_v86_state(regs, VM86_STI); + return; + } + +vm86_fault_return: + if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) { + save_v86_state(regs, VM86_PICRETURN); + return; + } + if (orig_flags & X86_EFLAGS_TF) + handle_vm86_trap(regs, 0, X86_TRAP_DB); + return; + simulate_sigsegv: /* FIXME: After a long discussion with Stas we finally * agreed, that this is wrong. Here we should @@ -720,7 +713,7 @@ simulate_sigsegv: * should be a mixture of the two, but how do we * get the information? [KD] */ - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } /* ---------------- vm86 special IRQ passing stuff ----------------- */ |