diff options
author | Ingo Molnar <mingo@kernel.org> | 2015-08-20 11:48:56 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-08-20 11:48:56 +0200 |
commit | 40a2ea1bd988e3bbdb07a0708681fdb05cd7d267 (patch) | |
tree | 05806aaa27a4b0516a14a8ccf703c1501500e588 /arch/x86 | |
parent | a897b5f0393a8a05d230c9248dc5324fb30720a0 (diff) | |
parent | 196676497f2507966f99abef63bede6a8550f8b3 (diff) | |
download | linux-40a2ea1bd988e3bbdb07a0708681fdb05cd7d267.tar.bz2 |
Merge branch 'perf/urgent' into perf/core, to pick up fixes before adding more changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/boot/compressed/eboot.c | 4 | ||||
-rw-r--r-- | arch/x86/entry/entry_64_compat.S | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/desc.h | 15 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu_context.h | 54 | ||||
-rw-r--r-- | arch/x86/include/asm/sigcontext.h | 6 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/sigcontext.h | 21 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 262 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/step.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mtrr.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 7 | ||||
-rw-r--r-- | arch/x86/math-emu/fpu_entry.c | 3 | ||||
-rw-r--r-- | arch/x86/math-emu/fpu_system.h | 21 | ||||
-rw-r--r-- | arch/x86/math-emu/get_address.c | 3 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 8 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 5 | ||||
-rw-r--r-- | arch/x86/power/cpu.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 40 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 6 |
25 files changed, 318 insertions, 214 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c82bd150d43..7d69afd8b6fa 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1193,6 +1193,10 @@ static efi_status_t setup_e820(struct boot_params *params, unsigned int e820_type = 0; unsigned long m = efi->efi_memmap; +#ifdef CONFIG_X86_64 + m |= (u64)efi->efi_memmap_hi << 32; +#endif + d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); switch (d->type) { case EFI_RESERVED_TYPE: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 5a1844765a7a..a7e257d9cb90 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -140,6 +140,7 @@ sysexit_from_sys_call: */ andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RIP(%rsp), %ecx /* User %eip */ + movq RAX(%rsp), %rax RESTORE_RSI_RDI xorl %edx, %edx /* Do not leak kernel information */ xorq %r8, %r8 @@ -219,7 +220,6 @@ sysexit_from_sys_call: 1: setbe %al /* 1 if error, 0 if not */ movzbl %al, %edi /* zero-extend that into %edi */ call __audit_syscall_exit - movq RAX(%rsp), %rax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -368,6 +368,7 @@ sysretl_from_sys_call: RESTORE_RSI_RDI_RDX movl RIP(%rsp), %ecx movl EFLAGS(%rsp), %r11d + movq RAX(%rsp), %rax xorq %r10, %r10 xorq %r9, %r9 xorq %r8, %r8 diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd2647..4e10d73cf018 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b4..364d27481a52 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; + struct ldt_struct *ldt; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 804a3a6030ca..984abfe47edc 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) {} #endif /* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + int size; +}; + +static inline void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} + +/* * Used for LDT copy/destruction. */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); @@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); } #ifdef CONFIG_SMP else { @@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 6fe6b182c998..9dfce4e0417d 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short __pad2; /* Was called gs, but was always zero. */ - unsigned short __pad1; /* Was called fs, but was always zero. */ - unsigned short ss; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 0e8a973de9ee..40836a9a7250 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,24 +177,9 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - - /* - * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), - * Linux saved and restored fs and gs in these slots. This - * was counterproductive, as fsbase and gsbase were never - * saved, so arch_prctl was presumably unreliable. - * - * If these slots are ever needed for any other purpose, there - * is some risk that very old 64-bit binaries could get - * confused. I doubt that many such binaries still work, - * though, since the same patch in 2.5.64 also removed the - * 64-bit set_thread_area syscall, so it appears that there is - * no TLS API that works in both pre- and post-2.5.64 kernels. - */ - __u16 __pad2; /* Was gs. */ - __u16 __pad1; /* Was fs. */ - - __u16 ss; + __u16 gs; + __u16 fs; + __u16 __pad0; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 845dc0df2002..206052e55517 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -943,7 +943,7 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) */ if (irq < nr_legacy_irqs() && data->count == 1) { if (info->ioapic_trigger != data->trigger) - mp_register_handler(irq, data->trigger); + mp_register_handler(irq, info->ioapic_trigger); data->entry.trigger = data->trigger = info->ioapic_trigger; data->entry.polarity = data->polarity = info->ioapic_polarity; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 922c5e0cea4c..cb9e5df42dd2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1410,7 +1410,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); clear_all_debug_regs(); dbg_restore_debug_regs(); @@ -1459,7 +1459,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8bac4bbecb9b..f56cf074d01a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { + struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0; - if (idx > current->active_mm->context.size) + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0; - desc = current->active_mm->context.ldt; + desc = &ldt->entries[idx]; } else { if (idx > GDT_ENTRIES) return 0; - desc = raw_cpu_ptr(gdt_page.gdt); + desc = raw_cpu_ptr(gdt_page.gdt) + idx; } - return get_desc_base(desc + idx); + return get_desc_base(desc); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cc..2bcc0525f1c1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> @@ -20,82 +21,82 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h> -#ifdef CONFIG_SMP +/* context.lock is held for us, so we don't need any locking. */ static void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + mm_context_t *pc; + + if (current->active_mm != current_mm) + return; + + pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); } -#endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ +static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + struct ldt_struct *new_ldt; + int alloc_size; + + if (size > LDT_ENTRIES) + return NULL; + + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + if (!new_ldt) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = size * LDT_ENTRY_SIZE; + + /* + * Xen is very picky: it requires a page-aligned LDT that has no + * trailing nonzero bytes in any page that contains LDT descriptors. + * Keep it simple: zero the whole allocation and never allocate less + * than PAGE_SIZE. + */ + if (alloc_size > PAGE_SIZE) + new_ldt->entries = vzalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; + new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); + if (!new_ldt->entries) { + kfree(new_ldt); + return NULL; + } - paravirt_alloc_ldt(newldt, mincount); + new_ldt->size = size; + return new_ldt; +} -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; +/* After calling this, the LDT is immutable. */ +static void finalize_ldt_struct(struct ldt_struct *ldt) +{ + paravirt_alloc_ldt(ldt->entries, ldt->size); } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +/* context.lock is held */ +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; + /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ + on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); +} - if (err < 0) - return err; +static void free_ldt_struct(struct ldt_struct *ldt) +{ + if (likely(!ldt)) + return; - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + paravirt_free_ldt(ldt->entries, ldt->size); + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + kfree(ldt->entries); + kfree(ldt); } /* @@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + struct ldt_struct *new_ldt; struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); - mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (!old_mm->context.ldt) { + mm->context.ldt = NULL; + goto out_unlock; + } + + new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + memcpy(new_ldt->entries, old_mm->context.ldt->entries, + new_ldt->size * LDT_ENTRY_SIZE); + finalize_ldt_struct(new_ldt); + + mm->context.ldt = new_ldt; + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; } @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; } static int read_ldt(void __user *ptr, unsigned long bytecount) { - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm; - if (!mm->context.size) - return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + /* Zero-fill the rest and pretend we read bytecount bytes. */ + if (clear_user(ptr + size, bytecount - size)) { + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; } static int read_default_ldt(void __user *ptr, unsigned long bytecount) @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) struct desc_struct ldt; int error; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; } - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; } - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { - error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - } - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; + if (old_ldt) + memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; + finalize_ldt_struct(new_ldt); - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0; out_unlock: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71d7849a07f7..f6b916387590 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 206996c1669d..71820c42b6ce 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); +#else /* !CONFIG_X86_32 */ + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); +#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->__pad2); - put_user_ex(0, &sc->__pad1); - put_user_ex(regs->ss, &sc->ss); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -451,19 +457,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* - * Set up the CS and SS registers to run signal handlers in - * 64-bit mode, even if the handler happens to be interrupting - * 32-bit or 16-bit code. - * - * SS is subtle. In 64-bit mode, we don't need any particular - * SS descriptor, but we do need SS to be valid. It's possible - * that the old SS is entirely bogus -- this can happen if the - * signal we're trying to deliver is #GP or #SS caused by a bad - * SS value. - */ + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - regs->ss = __USER_DS; return 0; } diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d0..0ccb53a9fcd9 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/ptrace.h> #include <asm/desc.h> +#include <asm/mmu_context.h> unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -27,13 +28,14 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + seg >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index dc0a84a6f309..9e8bf13572e6 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -672,16 +672,16 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) if (iter.mtrr_disabled) return mtrr_disabled_type(); + /* not contained in any MTRRs. */ + if (type == -1) + return mtrr_default_type(mtrr_state); + /* * We just check one page, partially covered by MTRRs is * impossible. */ WARN_ON(iter.partial_map); - /* not contained in any MTRRs. */ - if (type == -1) - return mtrr_default_type(mtrr_state); - return type; } EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5ef2560075bf..8f0f6eca69da 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2105,7 +2105,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; - kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); + adjust_tsc_offset_guest(vcpu, adj); } vcpu->arch.ia32_tsc_adjust_msr = data; } @@ -6327,6 +6327,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) static void process_smi(struct kvm_vcpu *vcpu) { struct kvm_segment cs, ds; + struct desc_ptr dt; char buf[512]; u32 cr0; @@ -6359,6 +6360,10 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_x86_ops->set_cr4(vcpu, 0); + /* Undocumented: IDT limit is set to zero on entry to SMM. */ + dt.address = dt.size = 0; + kvm_x86_ops->set_idt(vcpu, &dt); + __kvm_set_dr(vcpu, 7, DR7_FIXED_1); cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index f37e84ab49f3..3d8f2e421466 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -29,7 +29,6 @@ #include <asm/uaccess.h> #include <asm/traps.h> -#include <asm/desc.h> #include <asm/user.h> #include <asm/fpu/internal.h> @@ -181,7 +180,7 @@ void math_emulate(struct math_emu_info *info) math_abort(FPU_info, SIGILL); } - code_descriptor = LDT_DESCRIPTOR(FPU_CS); + code_descriptor = FPU_get_ldt_descriptor(FPU_CS); if (SEG_D_SIZE(code_descriptor)) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 9ccecb61a4fa..5e044d506b7a 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,9 +16,24 @@ #include <linux/kernel.h> #include <linux/mm.h> -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#include <asm/desc.h> +#include <asm/mmu_context.h> + +static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) +{ + static struct desc_struct zero_desc; + struct desc_struct ret = zero_desc; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + seg >>= 3; + mutex_lock(¤t->mm->context.lock); + if (current->mm->context.ldt && seg < current->mm->context.ldt->size) + ret = current->mm->context.ldt->entries[seg]; + mutex_unlock(¤t->mm->context.lock); +#endif + return ret; +} + #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 6ef5e99380f9..8300db71c2a6 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -20,7 +20,6 @@ #include <linux/stddef.h> #include <asm/uaccess.h> -#include <asm/desc.h> #include "fpu_system.h" #include "exception.h" @@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, addr->selector = PM_REG_(segment); } - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + descriptor = FPU_get_ldt_descriptor(addr->selector); base_address = SEG_BASE_ADDR(descriptor); address = base_address + offset; limit = base_address diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 579a8fd74be0..be2e7a2b10d7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 44 /* number of bytes to jump */ +#define OFFSET1 47 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -278,15 +278,15 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 33 +#define OFFSET2 36 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->prog[index]; */ - EMIT4(0x48, 0x8D, 0x44, 0xD6); /* lea rax, [rsi + rdx * 8 + 0x50] */ - EMIT1(offsetof(struct bpf_array, prog)); + EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + offsetof(struct bpf_array, prog)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cfba30f27392..e4308fe6afe8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -972,6 +972,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) static int __init arch_parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); if (parse_option_str(str, "debug")) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0d7dd1f5ac36..9ab52791fed5 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -22,6 +22,7 @@ #include <asm/fpu/internal.h> #include <asm/debugreg.h> #include <asm/cpu.h> +#include <asm/mmu_context.h> #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -153,7 +154,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */ fpu__resume_cpu(); } diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7322755f337a..4b6e29ac0968 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,13 +13,13 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o + p2m.o apic.o obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += apic.o vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0b95c9b8283f..11d6fb4e8483 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c20fe29e65f4..2292721b1d10 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -101,17 +101,15 @@ struct dom0_vga_console_info; #ifdef CONFIG_XEN_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -void __init xen_init_apic(void); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) { } -static inline void __init xen_init_apic(void) -{ -} #endif +void __init xen_init_apic(void); + #ifdef CONFIG_XEN_EFI extern void xen_efi_init(void); #else |