summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-09-18 14:01:26 +0200
committerIngo Molnar <mingo@elte.hu>2011-09-18 14:01:39 +0200
commitbfa322c48dc69bfdaee10faf3bd8dbc23b39a21c (patch)
tree95360c5d253115003080264d878f3c0f907f2978 /arch/x86
parent88ebc08ea9f721d1345d5414288a308ea42ac458 (diff)
parent003f6c9df54970d8b19578d195b3e2b398cdbde2 (diff)
downloadlinux-bfa322c48dc69bfdaee10faf3bd8dbc23b39a21c.tar.bz2
Merge branch 'linus' into sched/core
Merge reason: We are queueing up a dependent patch. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h6
-rw-r--r--arch/x86/include/asm/ptrace.h19
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/include/asm/xen/page.h4
-rw-r--r--arch/x86/kernel/Makefile13
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S1
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S41
-rw-r--r--arch/x86/kernel/vsyscall_64.c90
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
-rw-r--r--arch/x86/kernel/vsyscall_trace.h29
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/mm/fault.c15
-rw-r--r--arch/x86/pci/acpi.c14
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/olpc/olpc.c4
-rw-r--r--arch/x86/vdso/vdso.S1
-rw-r--r--arch/x86/vdso/vdso32/sysenter.S2
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c16
-rw-r--r--arch/x86/xen/setup.c19
-rw-r--r--arch/x86/xen/smp.c14
-rw-r--r--arch/x86/xen/time.c5
-rw-r--r--arch/x86/xen/xen-asm_32.S8
38 files changed, 258 insertions, 148 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a0e866d233ee..54edb207ff3a 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -672,7 +672,7 @@ ia32_sys_call_table:
.quad sys32_vm86_warning /* vm86 */
.quad quiet_ni_syscall /* query_module */
.quad sys_poll
- .quad compat_sys_nfsservctl
+ .quad quiet_ni_syscall /* old nfsservctl */
.quad sys_setresgid16 /* 170 */
.quad sys_getresgid16
.quad sys_prctl
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 7b439d9aea2a..41935fadfdfc 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->base2 = (info->base_addr & 0xff000000) >> 24;
/*
- * Don't allow setting of the lm bit. It is useless anyway
- * because 64bit system calls require __USER_CS:
+ * Don't allow setting of the lm bit. It would confuse
+ * user_64bit_mode and would get overridden by sysret anyway.
*/
desc->l = 0;
}
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f9a320984a10..7e50f06393aa 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,6 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vector 204 : legacy x86_64 vsyscall emulation
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
@@ -51,9 +50,6 @@
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
#endif
-#ifdef CONFIG_X86_64
-# define VSYSCALL_EMU_VECTOR 0xcc
-#endif
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2c7652163111..8e8b9a4987ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
#include <asm/desc_defs.h>
#include <asm/kmap_types.h>
+#include <asm/pgtable_types.h>
struct page;
struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
struct pv_info {
unsigned int kernel_rpl;
int shared_kernel_pmd;
+
+#ifdef CONFIG_X86_64
+ u16 extra_user_64bit_cs; /* __USER_CS if none */
+#endif
+
int paravirt_enabled;
const char *name;
};
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 94e7618fcac8..35664547125b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
#ifdef __KERNEL__
#include <linux/init.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt_types.h>
+#endif
struct cpuinfo_x86;
struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
#endif
}
+#ifdef CONFIG_X86_64
+static inline bool user_64bit_mode(struct pt_regs *regs)
+{
+#ifndef CONFIG_PARAVIRT
+ /*
+ * On non-paravirt systems, this is the only long mode CPL 3
+ * selector. We do not allow long mode selectors in the LDT.
+ */
+ return regs->cs == __USER_CS;
+#else
+ /* Headers are too twisted for this to go in paravirt.h. */
+ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+#endif
+}
+#endif
+
/*
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
* when it traps. The previous stack will be directly underneath the saved
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index a518c0a45044..c59cc97fe6c1 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -44,7 +44,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
#elif defined(__x86_64__)
__asm__ (
- "mul %[mul_frac] ; shrd $32, %[hi], %[lo]"
+ "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
: [lo]"=a"(product),
[hi]"=d"(tmp)
: "0"(delta),
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 2bae0a513b40..0012d0902c5f 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void);
asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);
-asmlinkage void emulate_vsyscall(void);
dotraplinkage void do_divide_error(struct pt_regs *, long);
dotraplinkage void do_debug(struct pt_regs *, long);
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
dotraplinkage void do_machine_check(struct pt_regs *, long);
#endif
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
-dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 705bf139288c..201040573444 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall)
__SYSCALL(__NR_quotactl, sys_quotactl)
#define __NR_nfsservctl 180
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
/* reserved for LiS/STREAMS */
#define __NR_getpmsg 181
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
#define __NR_setns 308
__SYSCALL(__NR_setns, sys_setns)
+#define __NR_getcpu 309
+__SYSCALL(__NR_getcpu, sys_getcpu)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 60107072c28b..eaea1d31f753 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -27,6 +27,12 @@ extern struct timezone sys_tz;
extern void map_vsyscall(void);
+/*
+ * Called on instruction fetch fault in vsyscall page.
+ * Returns true if handled.
+ */
+extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 64a619d47d34..7ff4669580cf 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -39,7 +39,7 @@ typedef struct xpaddr {
((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
extern unsigned long *machine_to_phys_mapping;
-extern unsigned int machine_to_phys_order;
+extern unsigned long machine_to_phys_nr;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+ if (unlikely(mfn >= machine_to_phys_nr)) {
pfn = ~0;
goto try_override;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 04105574c8e9..82f2912155a5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-#
-# vsyscalls (which work on the user stack) should have
-# no stack-protector checks:
-#
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
-CFLAGS_hpet.o := $(nostackp)
-CFLAGS_paravirt.o := $(nostackp)
-GCOV_PROFILE_vsyscall_64.o := n
-GCOV_PROFILE_hpet.o := n
-GCOV_PROFILE_tsc.o := n
-GCOV_PROFILE_paravirt.o := n
-
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index adc66c3a1fef..34b18594e724 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
- mdelay(10);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 08119a37e53c..6b96110bb0c3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
*/
static int mtrr_rendezvous_handler(void *info)
{
-#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
/*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
mtrr_if->set_all();
}
-#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4ee3abf20ed6..cfa62ec090ec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
perf_callchain_store(entry, regs->ip);
+ if (!current->mm)
+ return;
+
if (perf_callchain_user32(regs, entry))
return;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a91974918..f3f6f5344001 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
661: pushl_cfi $do_general_protection
662:
.section .altinstructions,"a"
- .balign 4
- .long 661b
- .long 663f
- .word X86_FEATURE_XMM
- .byte 662b-661b
- .byte 664f-663f
+ altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
.previous
.section .altinstr_replacement,"ax"
663: pushl $do_simd_coprocessor_error
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e13329d800c8..6419bb05ecd5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
zeroentry coprocessor_error do_coprocessor_error
errorentry alignment_check do_alignment_check
zeroentry simd_coprocessor_error do_simd_coprocessor_error
-zeroentry emulate_vsyscall do_emulate_vsyscall
/* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 613a7931ecc1..d90272e6bc40 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
.paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = __USER_CS,
+#endif
};
struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0cfe339..c346d1161488 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
#ifdef CONFIG_X86_64
case 0x40 ... 0x4f:
- if (regs->cs != __USER_CS)
+ if (!user_64bit_mode(regs))
/* 32-bit mode: register increment */
return 0;
/* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a045a1a2..bc19be332bc9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
.long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */
.long sys_poll
- .long sys_nfsservctl
+ .long sys_ni_syscall /* Old nfsservctl */
.long sys_setresgid16 /* 170 */
.long sys_getresgid16
.long sys_prctl
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9682ec50180c..6913369c234c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,12 +872,6 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors);
#endif
-#ifdef CONFIG_X86_64
- BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
- set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
- set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
-#endif
-
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4aa9c54a9b76..0f703f10901a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(6); /* RW_ */
#ifdef CONFIG_X86_64
- user PT_LOAD FLAGS(5); /* R_E */
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(6); /* RW_ */
#endif
@@ -154,44 +153,16 @@ SECTIONS
#ifdef CONFIG_X86_64
-#define VSYSCALL_ADDR (-10*1024*1024)
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
- . = ALIGN(4096);
- __vsyscall_0 = .;
-
- . = VSYSCALL_ADDR;
- .vsyscall : AT(VLOAD(.vsyscall)) {
- *(.vsyscall_0)
-
- . = 1024;
- *(.vsyscall_1)
-
- . = 2048;
- *(.vsyscall_2)
-
- . = 4096; /* Pad the whole page. */
- } :user =0xcc
- . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
-
-#undef VSYSCALL_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
+ . = ALIGN(PAGE_SIZE);
__vvar_page = .;
.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
+ /* work around gold bug 13023 */
+ __vvar_beginning_hack = .;
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) \
- . = offset; \
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) \
+ . = __vvar_beginning_hack + offset; \
*(.vvar_ ## name)
#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff9cef7..18ae83dd1cd7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
* use the vDSO.
*/
-/* Disable profiling for userspace code: */
-#define DISABLE_BRANCH_PROFILING
-
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -50,12 +47,36 @@
#include <asm/vgtod.h>
#include <asm/traps.h>
+#define CREATE_TRACE_POINTS
+#include "vsyscall_trace.h"
+
DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+
+static int __init vsyscall_setup(char *str)
+{
+ if (str) {
+ if (!strcmp("emulate", str))
+ vsyscall_mode = EMULATE;
+ else if (!strcmp("native", str))
+ vsyscall_mode = NATIVE;
+ else if (!strcmp("none", str))
+ vsyscall_mode = NONE;
+ else
+ return -EINVAL;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("vsyscall", vsyscall_setup);
+
void update_vsyscall_tz(void)
{
unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, tsk->comm, task_pid_nr(tsk),
- message, regs->ip - 2, regs->cs,
+ message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
}
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
-void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
+bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
long ret;
- local_irq_enable();
-
/*
- * Real 64-bit user mode code has cs == __USER_CS. Anything else
- * is bogus.
+ * No point in checking CS -- the only way to get here is a user mode
+ * trap to a high address, which means that we're in 64-bit user code.
*/
- if (regs->cs != __USER_CS) {
- /*
- * If we trapped from kernel mode, we might as well OOPS now
- * instead of returning to some random address and OOPSing
- * then.
- */
- BUG_ON(!user_mode(regs));
- /* Compat mode and non-compat 32-bit CS should both segfault. */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc from 32-bit mode");
- goto sigsegv;
+ WARN_ON_ONCE(address != regs->ip);
+
+ if (vsyscall_mode == NONE) {
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall attempted with vsyscall=none");
+ return false;
}
- /*
- * x86-ism here: regs->ip points to the instruction after the int 0xcc,
- * and int 0xcc is two bytes long.
- */
- vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
+ vsyscall_nr = addr_to_vsyscall_nr(address);
+
+ trace_emulate_vsyscall(vsyscall_nr);
+
if (vsyscall_nr < 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc (exploit attempt?)");
+ "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
goto sigsegv;
}
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
- warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
+ warn_bad_vsyscall(KERN_WARNING, regs,
+ "vsyscall with bad stack (exploit attempt?)");
goto sigsegv;
}
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
regs->ip = caller;
regs->sp += 8;
- local_irq_disable();
- return;
+ return true;
sigsegv:
- regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
force_sig(SIGSEGV, current);
- local_irq_disable();
+ return true;
}
/*
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
void __init map_vsyscall(void)
{
- extern char __vsyscall_0;
- unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+ extern char __vsyscall_page;
+ unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
extern char __vvar_page;
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
- /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
- __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
+ vsyscall_mode == NATIVE
+ ? PAGE_KERNEL_VSYSCALL
+ : PAGE_KERNEL_VVAR);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
+ (unsigned long)VSYSCALL_START);
+
__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
- BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
+ (unsigned long)VVAR_ADDRESS);
}
static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845eae5ca..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
*/
#include <linux/linkage.h>
+
#include <asm/irq_vectors.h>
+#include <asm/page_types.h>
+#include <asm/unistd_64.h>
+
+__PAGE_ALIGNED_DATA
+ .globl __vsyscall_page
+ .balign PAGE_SIZE, 0xcc
+ .type __vsyscall_page, @object
+__vsyscall_page:
+
+ mov $__NR_gettimeofday, %rax
+ syscall
+ ret
-/* The unused parts of the page are filled with 0xcc by the linker script. */
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+ ret
-.section .vsyscall_0, "a"
-ENTRY(vsyscall_0)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_0)
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+ ret
-.section .vsyscall_1, "a"
-ENTRY(vsyscall_1)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_1)
+ .balign 4096, 0xcc
-.section .vsyscall_2, "a"
-ENTRY(vsyscall_2)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_2)
+ .size __vsyscall_page, 4096
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
new file mode 100644
index 000000000000..a8b2edec54fe
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vsyscall
+
+#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VSYSCALL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(emulate_vsyscall,
+
+ TP_PROTO(int nr),
+
+ TP_ARGS(nr),
+
+ TP_STRUCT__entry(__field(int, nr)),
+
+ TP_fast_assign(
+ __entry->nr = nr;
+ ),
+
+ TP_printk("nr = %d", __entry->nr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_FILE vsyscall_trace
+#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 988724b236b6..ff5790d8e990 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,6 +22,8 @@ config KVM
depends on HAVE_KVM
# for device assignment:
depends on PCI
+ # for TASKSTATS/TASK_DELAY_ACCT:
+ depends on NET
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
@@ -31,6 +33,7 @@ config KVM
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
+ select TASKSTATS
select TASK_DELAY_ACCT
---help---
Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4d09df054e39..0d17c8c50acd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -17,6 +17,7 @@
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
+#include <asm/vsyscall.h>
/*
* Page fault error code bits:
@@ -105,7 +106,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
* but for now it's good enough to assume that long
* mode only uses well known segments or kernel.
*/
- return (!user_mode(regs)) || (regs->cs == __USER_CS);
+ return (!user_mode(regs) || user_64bit_mode(regs));
#endif
case 0x60:
/* 0x64 thru 0x67 are valid prefixes in all modes. */
@@ -720,6 +721,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_errata100(regs, address))
return;
+#ifdef CONFIG_X86_64
+ /*
+ * Instruction fetch faults in the vsyscall page might need
+ * emulation.
+ */
+ if (unlikely((error_code & PF_INSTR) &&
+ ((address & ~0xfff) == VSYSCALL_START))) {
+ if (emulate_vsyscall(regs, address))
+ return;
+ }
+#endif
+
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ae3cb23cd89b..039d91315bc5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -360,6 +360,20 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
}
}
+ /* After the PCI-E bus has been walked and all devices discovered,
+ * configure any settings of the fabric that might be necessary.
+ */
+ if (bus) {
+ struct pci_bus *child;
+ list_for_each_entry(child, &bus->children, node) {
+ struct pci_dev *self = child->self;
+ if (!self)
+ continue;
+
+ pcie_bus_configure_settings(child, self->pcie_mpss);
+ }
+ }
+
if (!bus)
kfree(sd);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 7000e74b3087..58425adc22c6 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
irq_attr.trigger = 1;
irq_attr.polarity = 1;
io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
- }
+ } else
+ pentry->irq = 0; /* No irq */
+
switch (pentry->type) {
case SFI_DEV_TYPE_IPC:
/* ID as IRQ is a hack that will go away */
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 8b9940e78e2f..7cce722667b8 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -161,13 +161,13 @@ restart:
if (inbuf && inlen) {
/* write data to EC */
for (i = 0; i < inlen; i++) {
+ pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
+ outb(inbuf[i], 0x68);
if (wait_on_ibf(0x6c, 0)) {
printk(KERN_ERR "olpc-ec: timeout waiting for"
" EC accept data!\n");
goto err;
}
- pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
- outb(inbuf[i], 0x68);
}
}
if (outbuf && outlen) {
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1b979c12ba85..01f5e3b4613c 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
vdso_start:
.incbin "arch/x86/vdso/vdso.so"
vdso_end:
+ .align PAGE_SIZE /* extra data here leaks to userspace. */
.previous
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index e2800affa754..e354bceee0e0 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -43,7 +43,7 @@ __kernel_vsyscall:
.space 7,0x90
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
- jmp .Lenter_kernel
+ int $0x80
/* 16: System call normal return point is here! */
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
pop %ebp
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3326204e251f..add2c2d729ce 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o
-obj-$(CONFIG_FTRACE) += trace.o
+obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 974a528458a0..2d69617950f7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
+unsigned long machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
.name = "Xen",
};
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f987bde77c49..20a614275064 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
void __init xen_setup_machphys_mapping(void)
{
struct xen_machphys_mapping mapping;
- unsigned long machine_to_phys_nr_ents;
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ machine_to_phys_nr = mapping.max_mfn + 1;
} else {
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
- machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+#ifdef CONFIG_X86_32
+ if ((machine_to_phys_mapping + machine_to_phys_nr)
+ < machine_to_phys_mapping)
+ machine_to_phys_nr = (unsigned long *)NULL
+ - machine_to_phys_mapping;
+#endif
}
#ifdef CONFIG_X86_64
@@ -1916,6 +1920,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
# endif
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+ case VVAR_PAGE:
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
@@ -1956,7 +1961,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#ifdef CONFIG_X86_64
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
- if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+ if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+ idx == VVAR_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index df118a825f39..c3b8d440873c 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -184,6 +184,19 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
PFN_UP(start_pci), PFN_DOWN(last));
return identity;
}
+
+static unsigned long __init xen_get_max_pages(void)
+{
+ unsigned long max_pages = MAX_DOMAIN_PAGES;
+ domid_t domid = DOMID_SELF;
+ int ret;
+
+ ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+ if (ret > 0)
+ max_pages = ret;
+ return min(max_pages, MAX_DOMAIN_PAGES);
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
@@ -292,6 +305,12 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ extra_limit = xen_get_max_pages();
+ if (extra_limit >= max_pfn)
+ extra_pages = extra_limit - max_pfn;
+ else
+ extra_pages = 0;
+
extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
/*
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b4533a86d7e4..d4fc6d454f8d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
#include <xen/page.h>
#include <xen/events.h>
+#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
unsigned cpu;
unsigned int i;
+ if (skip_ioapic_setup) {
+ char *m = (max_cpus == 0) ?
+ "The nosmp parameter is incompatible with Xen; " \
+ "use Xen dom0_max_vcpus=1 parameter" :
+ "The noapic parameter is incompatible with Xen";
+
+ xen_raw_printk(m);
+ panic(m);
+ }
xen_init_lock_cpu(0);
smp_store_cpu_info(0);
@@ -521,8 +531,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
- if (!xen_have_vector_callback)
- return;
xen_init_lock_cpu(0);
xen_init_spinlocks();
}
@@ -546,6 +554,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
void __init xen_hvm_smp_init(void)
{
+ if (!xen_have_vector_callback)
+ return;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
smp_ops.cpu_up = xen_hvm_cpu_up;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5158c505bef9..163b4679556e 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void)
struct pvclock_vcpu_time_info *src;
cycle_t ret;
- src = &get_cpu_var(xen_vcpu)->time;
+ preempt_disable_notrace();
+ src = &__get_cpu_var(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
- put_cpu_var(xen_vcpu);
+ preempt_enable_notrace();
return ret;
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 22a2093b5862..b040b0e518ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
/*
* If there's something pending, mask events again so we can
- * jump back into xen_hypervisor_callback
+ * jump back into xen_hypervisor_callback. Otherwise do not
+ * touch XEN_vcpu_info_mask.
*/
- sete XEN_vcpu_info_mask(%eax)
+ jne 1f
+ movb $1, XEN_vcpu_info_mask(%eax)
- popl %eax
+1: popl %eax
/*
* From this point on the registers are restored and the stack