From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:10 +0530 Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces The generic hardware breakpoint interface provides an abstraction of hardware breakpoints in front of specific arch implementations for both kernel and user side breakpoints. This includes execution breakpoints and read/write breakpoints, also known as "watchpoints". This patch introduces header files containing constants, structure definitions and declaration of functions used by the hardware breakpoint core and x86 specific code. It also introduces an array based storage for the debug-register values in 'struct thread_struct', while modifying all users of debugreg member in the structure. [ Impact: add headers for new hardware breakpoint interface ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/a.out-core.h | 8 +++--- arch/x86/include/asm/debugreg.h | 29 +++++++++++++++++++ arch/x86/include/asm/hw_breakpoint.h | 55 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/processor.h | 8 +++--- arch/x86/kernel/process.c | 16 +++++------ arch/x86/kernel/ptrace.c | 16 +++++------ arch/x86/power/cpu_32.c | 8 +++--- arch/x86/power/cpu_64.c | 8 +++--- 8 files changed, 116 insertions(+), 32 deletions(-) create mode 100644 arch/x86/include/asm/hw_breakpoint.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e397aa84..fc4685dd6e4d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[0] = current->thread.debugreg[0]; + dump->u_debugreg[1] = current->thread.debugreg[1]; + dump->u_debugreg[2] = current->thread.debugreg[2]; + dump->u_debugreg[3] = current->thread.debugreg[3]; dump->u_debugreg[4] = 0; dump->u_debugreg[5] = 0; dump->u_debugreg[6] = current->thread.debugreg6; diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37be9e2..23439fbb1d0e 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,6 +18,7 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -49,6 +50,8 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -67,4 +70,30 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +/* For process management */ +extern void flush_thread_hw_breakpoint(struct task_struct *tsk); +extern int copy_thread_hw_breakpoint(struct task_struct *tsk, + struct task_struct *child, unsigned long clone_flags); + +/* For CPU management */ +extern void load_debug_registers(void); +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 000000000000..1acb4d45de70 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,55 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +struct arch_hw_breakpoint { + char *name; /* Contains name of the symbol to set bkpt */ + unsigned long address; + u8 len; + u8 type; +}; + +#include +#include + +/* Available HW breakpoint length encodings */ +#define HW_BREAKPOINT_LEN_1 0x40 +#define HW_BREAKPOINT_LEN_2 0x44 +#define HW_BREAKPOINT_LEN_4 0x4c +#define HW_BREAKPOINT_LEN_EXECUTE 0x40 + +#ifdef CONFIG_X86_64 +#define HW_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define HW_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define HW_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define HW_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; +DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); +extern unsigned int hbp_user_refcount[HBP_NUM]; + +extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); +extern void arch_uninstall_thread_hw_breakpoint(void); +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk); +extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); +extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); +extern void arch_update_kernel_hw_breakpoint(void *); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0b2fab0051e0..448b34a8e393 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -29,6 +29,7 @@ struct mm_struct; #include #include +#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -431,12 +432,11 @@ struct thread_struct { unsigned long fs; unsigned long gs; /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; + unsigned long debugreg[HBP_NUM]; unsigned long debugreg6; unsigned long debugreg7; + /* Hardware breakpoint info */ + struct hw_breakpoint *hbp[HBP_NUM]; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fb5dfb891f0f..291527cb438a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -106,10 +106,10 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; + tsk->thread.debugreg[0] = 0; + tsk->thread.debugreg[1] = 0; + tsk->thread.debugreg[2] = 0; + tsk->thread.debugreg[3] = 0; tsk->thread.debugreg6 = 0; tsk->thread.debugreg7 = 0; memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); @@ -194,10 +194,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); + set_debugreg(next->debugreg[0], 0); + set_debugreg(next->debugreg[1], 1); + set_debugreg(next->debugreg[2], 2); + set_debugreg(next->debugreg[3], 3); /* no 4 and 5 */ set_debugreg(next->debugreg6, 6); set_debugreg(next->debugreg7, 7); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..313be40be55a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -471,10 +471,10 @@ static int genregs_set(struct task_struct *target, static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) { switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; + case 0: return child->thread.debugreg[0]; + case 1: return child->thread.debugreg[1]; + case 2: return child->thread.debugreg[2]; + case 3: return child->thread.debugreg[3]; case 6: return child->thread.debugreg6; case 7: return child->thread.debugreg7; } @@ -493,10 +493,10 @@ static int ptrace_set_debugreg(struct task_struct *child, return -EIO; switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + case 0: child->thread.debugreg[0] = data; break; + case 1: child->thread.debugreg[1] = data; break; + case 2: child->thread.debugreg[2] = data; break; + case 3: child->thread.debugreg[3] = data; break; case 6: if ((data & ~0xffffffffUL) != 0) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index ce702c5b3a2c..519913948003 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -84,10 +84,10 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ set_debugreg(current->thread.debugreg6, 6); set_debugreg(current->thread.debugreg7, 7); diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 5343540f2607..1e3bdcc959ff 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -163,10 +163,10 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg7){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ loaddebug(¤t->thread, 6); loaddebug(¤t->thread, 7); -- cgit v1.2.3 From 0067f1297241ea567f2b22a455519752d70fcca9 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:57 +0530 Subject: hw-breakpoints: x86 architecture implementation of Hardware Breakpoint interfaces This patch introduces the arch-specific implementation of the generic hardware breakpoints in kernel/hw_breakpoint.c inside x86 specific directories. It contains functions which help to validate and serve requests using Hardware Breakpoint registers on x86 processors. [ fweisbec@gmail.com: fix conflict against kmemcheck ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/Kconfig | 1 + arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/hw_breakpoint.c | 382 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/hw_breakpoint.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885eee14..3033375ed6bc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,6 +46,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77df4d654ff9..cbc781829173 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..4867c9f3b5fb --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,382 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Unmasked kernel DR7 value */ +static unsigned long kdr7; + +/* + * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. + * Used to clear and verify the status of bits corresponding to DR0 - DR3 + */ +static const unsigned long dr7_masks[HBP_NUM] = { + 0x000f0003, /* LEN0, R/W0, G0, L0 */ + 0x00f0000c, /* LEN1, R/W1, G1, L1 */ + 0x0f000030, /* LEN2, R/W2, G2, L2 */ + 0xf00000c0 /* LEN3, R/W3, G3, L3 */ +}; + + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | + DR_GLOBAL_SLOWDOWN; + return bp_info; +} + +void arch_update_kernel_hw_breakpoint(void *unused) +{ + struct hw_breakpoint *bp; + int i, cpu = get_cpu(); + unsigned long temp_kdr7 = 0; + + /* Don't allow debug exceptions while we update the registers */ + set_debugreg(0UL, 7); + + for (i = hbp_kernel_pos; i < HBP_NUM; i++) { + per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; + if (bp) { + temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); + set_debugreg(bp->info.address, i); + } + } + + /* No need to set DR6. Update the debug registers with kernel-space + * breakpoint values from kdr7 and user-space requests from the + * current process + */ + kdr7 = temp_kdr7; + set_debugreg(kdr7 | current->thread.debugreg7, 7); + put_cpu_no_resched(); +} + +/* + * Install the thread breakpoints in their debug registers. + */ +void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + + switch (hbp_kernel_pos) { + case 4: + set_debugreg(thread->debugreg[3], 3); + case 3: + set_debugreg(thread->debugreg[2], 2); + case 2: + set_debugreg(thread->debugreg[1], 1); + case 1: + set_debugreg(thread->debugreg[0], 0); + default: + break; + } + + /* No need to set DR6 */ + set_debugreg((kdr7 | thread->debugreg7), 7); +} + +/* + * Install the debug register values for just the kernel, no thread. + */ +void arch_uninstall_thread_hw_breakpoint() +{ + /* Clear the user-space portion of debugreg7 by setting only kdr7 */ + set_debugreg(kdr7, 7); + +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case HW_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case HW_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case HW_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +{ + /* + * User-space requests will always have the address field populated + * Symbol names from user-space are rejected + */ + if (tsk && bp->info.name) + return -EINVAL; + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (bp->info.name) + bp->info.address = (unsigned long) + kallsyms_lookup_name(bp->info.name); + if (bp->info.address) + return 0; + return -EINVAL; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk) +{ + unsigned int align; + int ret = -EINVAL; + + switch (bp->info.type) { + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + case HW_BREAKPOINT_EXECUTE: + if ((!arch_check_va_in_userspace(bp->info.address, + bp->info.len)) && + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) + return ret; + break; + case HW_BREAKPOINT_WRITE: + break; + case HW_BREAKPOINT_RW: + break; + default: + return ret; + } + + switch (bp->info.len) { + case HW_BREAKPOINT_LEN_1: + align = 0; + break; + case HW_BREAKPOINT_LEN_2: + align = 1; + break; + case HW_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->triggered) + ret = arch_store_info(bp, tsk); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (bp->info.address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(bp->info.address, + bp->info.len)) + return -EFAULT; + } + return 0; +} + +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + struct hw_breakpoint *bp = thread->hbp[pos]; + + thread->debugreg7 &= ~dr7_masks[pos]; + if (bp) { + thread->debugreg[pos] = bp->info.address; + thread->debugreg7 |= encode_dr7(pos, bp->info.len, + bp->info.type); + } else + thread->debugreg[pos] = 0; +} + +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + thread->debugreg7 = 0; + for (i = 0; i < HBP_NUM; i++) + thread->debugreg[i] = 0; +} + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct hw_breakpoint *bp; + /* The DR6 value is stored in args->err */ + unsigned long dr7, dr6 = args->err; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + /* Lazy debug register switching */ + if (!test_tsk_thread_flag(current, TIF_DEBUG)) + arch_uninstall_thread_hw_breakpoint(); + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + /* + * Find the corresponding hw_breakpoint structure and + * invoke its triggered callback. + */ + if (i >= hbp_kernel_pos) + bp = per_cpu(this_hbp_kernel[i], cpu); + else { + bp = current->thread.hbp[i]; + if (bp) + rc = NOTIFY_DONE; + } + /* + * bp can be NULL due to lazy debug register switching + * or due to the delay between updates of hbp_kernel_pos + * and this_hbp_kernel. + */ + if (!bp) + continue; + + (bp->triggered)(bp, args->regs); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu_no_resched(); + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} -- cgit v1.2.3 From 08d68323d1f0c34452e614263b212ca556dae47f Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:08 +0530 Subject: hw-breakpoints: modifying generic debug exception to use thread-specific debug registers This patch modifies the breakpoint exception handler code to use the new abstract debug register names. [ fweisbec@gmail.com: fix conflict against kmemcheck ] [ Impact: refactor and cleanup x86 debug exception handler ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/traps.c | 69 +++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d288327ff0..de9913247dd0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -529,73 +529,52 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code, SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } -- cgit v1.2.3 From 1e3500666f7c5daaadadb8431a2927cdbbdb7dd4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:26 +0530 Subject: hw-breakpoints: use wrapper routines around debug registers in processor related functions This patch enables the use of wrapper routines to access the debug/breakpoint registers on cpu management. The hardcoded debug registers save and restore operations for threads breakpoints are replaced by wrappers. And now that we handle the kernel breakpoints too, we also need to handle them on cpu hotplug operations. [ Impact: adapt new hardware breakpoint api to cpu hotplug ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/smpboot.c | 3 +++ arch/x86/power/cpu_32.c | 13 +++---------- arch/x86/power/cpu_64.c | 12 +++--------- 3 files changed, 9 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef917d8..2b2652d205c0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused) setup_secondary_clock(); wmb(); + load_debug_registers(); cpu_idle(); } @@ -1250,6 +1252,7 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 519913948003..2bc3b016de90 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct saved_context saved_context; @@ -48,6 +49,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); ctxt->cr4 = read_cr4_safe(); + hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -83,16 +85,7 @@ static void fix_processor_context(void) /* * Now maybe reload the debug registers */ - if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } - + load_debug_registers(); } static void __restore_processor_state(struct saved_context *ctxt) diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 1e3bdcc959ff..46866a13a93a 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -16,6 +16,7 @@ #include #include #include +#include static void fix_processor_context(void); @@ -71,6 +72,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr3 = read_cr3(); ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); + hw_breakpoint_disable(); } void save_processor_state(void) @@ -162,13 +164,5 @@ static void fix_processor_context(void) /* * Now maybe reload the debug registers */ - if (current->thread.debugreg7){ - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } + load_debug_registers(); } -- cgit v1.2.3 From 66cb5917295958652ff6ba36d83f98f2379c46b4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:55 +0530 Subject: hw-breakpoints: use the new wrapper routines to access debug registers in process/thread code This patch enables the use of abstract debug registers in process-handling routines, according to the new hardware breakpoint Api. [ Impact: adapt thread breakpoints handling code to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/process.c | 22 ++++++---------------- arch/x86/kernel/process_32.c | 28 ++++++++++++++++++++++++++++ arch/x86/kernel/process_64.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 291527cb438a..19a686c401b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -46,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -106,12 +110,8 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg[0] = 0; - tsk->thread.debugreg[1] = 0; - tsk->thread.debugreg[2] = 0; - tsk->thread.debugreg[3] = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -193,16 +193,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5e4bfef4472..297ffff2ffc2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -61,6 +61,8 @@ #include #include #include +#include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -265,7 +267,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); @@ -285,10 +293,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); +out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -427,6 +438,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5a1a1de292ec..f7b276d4b3fb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include asmlinkage extern void ret_from_fork(void); @@ -248,6 +250,8 @@ void release_thread(struct task_struct *dead_task) BUG(); } } + if (unlikely(dead_task->thread.debugreg7)) + flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(me, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -347,6 +357,9 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); + return err; } @@ -492,6 +505,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); + return prev_p; } -- cgit v1.2.3 From da0cdc14f5f7e0faee6b2393fefed056cdb17146 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:03 +0530 Subject: hw-breakpoints: modify signal handling code to refrain from re-enabling HW Breakpoints This patch disables re-enabling of Hardware Breakpoint registers through the signal handling code. This is now done during from hw_breakpoint_handler(). Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/signal.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 14425166b8e3..f33d2e0ef095 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -800,15 +800,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* -- cgit v1.2.3 From 72f674d203cd230426437cdcf7dd6f681dad8b0d Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:48 +0530 Subject: hw-breakpoints: modify Ptrace routines to access breakpoint registers This patch modifies the ptrace code to use the new wrapper routines around the debug/breakpoint registers. [ Impact: adapt x86 ptrace to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Signed-off-by: Maneesh Soni Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ptrace.c | 231 +++++++++++++++++++++++++++++------------------ 1 file changed, 141 insertions(+), 90 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 313be40be55a..b457f78b7dbf 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target, } /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, + unsigned *type) { - switch (n) { - case 0: return child->thread.debugreg[0]; - case 1: return child->thread.debugreg[1]; - case 2: return child->thread.debugreg[2]; - case 3: return child->thread.debugreg[3]; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; - } - return 0; + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) { + struct thread_struct *thread = &(current->thread); int i; - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < hbp_kernel_pos; i++) + /* + * We will check bp->info.address against the address stored in + * thread's hbp structure and not debugreg[i]. This is to ensure + * that the corresponding bit for 'i' in DR7 register is enabled + */ + if (bp->info.address == thread->hbp[i]->info.address) + break; - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + thread->debugreg6 |= (DR_TRAP0 << i); +} - switch (n) { - case 0: child->thread.debugreg[0] = data; break; - case 1: child->thread.debugreg[1] = data; break; - case 2: child->thread.debugreg[2] = data; break; - case 3: child->thread.debugreg[3] = data; break; +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7 = thread->debugreg7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct hw_breakpoint *bp; + + data &= ~DR_CONTROL_RESERVED; +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->hbp[i]; + + if (!enabled) { + if (bp) { + /* Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + unregister_user_hw_breakpoint(tsk, bp); + kfree(bp); + } + continue; + } + if (!bp) { + rc = -ENOMEM; + bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (bp) { + bp->info.address = thread->debugreg[i]; + bp->triggered = ptrace_triggered; + bp->info.len = len; + bp->info.type = type; + rc = register_user_hw_breakpoint(tsk, bp); + if (rc) + kfree(bp); + } + } else + rc = modify_user_hw_breakpoint(tsk, bp); + if (rc) + break; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; + + if (n < HBP_NUM) + val = thread->debugreg[n]; + else if (n == 6) + val = thread->debugreg6; + else if (n == 7) + val = thread->debugreg7; + return val; +} - case 7: - /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) - */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + tsk->thread.debugreg6 = val; + goto ret_path; } + if (n < HBP_NUM) { + if (thread->hbp[n]) { + if (arch_check_va_in_userspace(val, + thread->hbp[n]->info.len) == 0) { + rc = -EIO; + goto ret_path; + } + thread->hbp[n]->info.address = val; + } + thread->debugreg[n] = val; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); - return 0; +ret_path: + return rc; } /* -- cgit v1.2.3 From 17f557e5b5d43a2af66c969f6560ac7105020672 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:46:03 +0530 Subject: hw-breakpoints: cleanup HW Breakpoint registers before kexec This patch disables Hardware breakpoints before doing a 'kexec' on the machine so that the cpu doesn't keep debug registers values which would be out of sync for the new image. Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/machine_kexec_32.c | 2 ++ arch/x86/kernel/machine_kexec_64.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..c843f8406da2 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include #include #include +#include static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..4a8bb82248ae 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include #include #include +#include static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC -- cgit v1.2.3 From 62edab9056a6cf0c9207339c8892c923a5217e45 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:47:06 +0530 Subject: hw-breakpoints: reset bits in dr6 after the corresponding exception is handled This patch resets the bit in dr6 after the corresponding exception is handled in code, so that we keep a clean track of the current virtual debug status register. [ Impact: keep track of breakpoints triggering completion ] Signed-off-by: K.Prasad Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/hw_breakpoint.c | 13 +++++++++++-- arch/x86/kernel/kgdb.c | 6 ++++++ arch/x86/kernel/kprobes.c | 9 ++++++++- arch/x86/kernel/traps.c | 4 ++-- arch/x86/mm/kmmio.c | 8 +++++++- 5 files changed, 34 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 4867c9f3b5fb..69451473dbd2 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -314,8 +314,12 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct hw_breakpoint *bp; - /* The DR6 value is stored in args->err */ - unsigned long dr7, dr6 = args->err; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) @@ -351,6 +355,11 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) if (bp) rc = NOTIFY_DONE; } + /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching * or due to the delay between updates of hbp_kernel_pos diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b1f4dffb919e..f820b73c7f28 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..b5b1848c5336 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -54,6 +54,7 @@ #include #include #include +#include void jprobe_return_end(void); @@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index de9913247dd0..124a4d5a95b2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -545,8 +545,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; - if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code, - SIGTRAP) == NOTIFY_STOP) + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..11a4ad4d6253 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) + if (post_kmmio_handler(arg->err, arg->regs) == 1) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; return NOTIFY_STOP; + } return NOTIFY_DONE; } -- cgit v1.2.3 From 4555835b707d5c778ee1c9076670bc99b1eeaf61 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 17 Jun 2009 14:44:19 +0530 Subject: x86: hw_breakpoint.c arch_check_va_in_kernelspace and hw_breakpoint_handler should be static arch_check_va_in_kernelspace() and hw_breakpoint_handler() is used only by same file so it should be static. Also fixed non-ANSI function declaration of function 'arch_uninstall_thread_hw_breakpoint' Fixed following sparse warnings : arch/x86/kernel/hw_breakpoint.c:124:42: warning: non-ANSI function declaration of function 'arch_uninstall_thread_hw_breakpoint' arch/x86/kernel/hw_breakpoint.c:169:5: warning: symbol 'arch_check_va_in_kernelspace' was not declared. Should it be static? arch/x86/kernel/hw_breakpoint.c:313:15: warning: symbol 'hw_breakpoint_handler' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Cc: Alan Stern Cc: "K.Prasad" Cc: Frederic Weisbecker LKML-Reference: <1245230059.2662.4.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/hw_breakpoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 51d959528b1d..9316a9de4de3 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -121,7 +121,7 @@ void arch_install_thread_hw_breakpoint(struct task_struct *tsk) /* * Install the debug register values for just the kernel, no thread. */ -void arch_uninstall_thread_hw_breakpoint() +void arch_uninstall_thread_hw_breakpoint(void) { /* Clear the user-space portion of debugreg7 by setting only kdr7 */ set_debugreg(kdr7, 7); @@ -166,7 +166,7 @@ int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) /* * Check for virtual address in kernel space. */ -int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) { unsigned int len; @@ -310,7 +310,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) * NOTIFY_STOP returned for all other cases * */ -int __kprobes hw_breakpoint_handler(struct die_args *args) +static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct hw_breakpoint *bp; -- cgit v1.2.3 From 9d22b536609abf0d64648f99518676ea58245e3b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 19:52:30 +0530 Subject: x86: Mark ptrace_get_debugreg() as static This sparse warning: arch/x86/kernel/ptrace.c:560:15: warning: symbol 'ptrace_get_debugreg' was not declared. Should it be static? triggers because ptrace_get_debugreg() is global but is only used in a single .c file. change ptrace_get_debugreg() to static to fix that - this also addresses the sparse warning. Signed-off-by: Jaswinder Singh Rajput Cc: Steven Rostedt LKML-Reference: <1246458150.6940.19.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b457f78b7dbf..cabdabce3cb2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -557,7 +557,7 @@ restore: /* * Handle PTRACE_PEEKUSR calls for the debug register area. */ -unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; -- cgit v1.2.3 From 39fe05e58c5e448601ce46e6b03900d5bf31c4b0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Aug 2009 11:16:12 +0800 Subject: x86, hpet: Disable per-cpu hpet timer if ARAT is supported If CPU support always running local APIC timer, per-cpu hpet timer could be disabled, which is useless and wasteful in such case. Let's leave the timers to others. The effect is that we reserve less timers. Signed-off-by: Shaohua Li Cc: venkatesh.pallipadi@intel.com LKML-Reference: <20090812031612.GA10062@sli10-desk.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dedc2bddf7a5..5969e1078fc2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -584,6 +584,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int num_timers_used = 0; int i; + if (boot_cpu_has(X86_FEATURE_ARAT)) + return; id = hpet_readl(HPET_ID); num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); @@ -872,10 +874,8 @@ int __init hpet_enable(void) if (id & HPET_ID_LEGSUP) { hpet_legacy_clockevent_register(); - hpet_msi_capability_lookup(2); return 1; } - hpet_msi_capability_lookup(0); return 0; out_nohpet: @@ -908,9 +908,17 @@ static __init int hpet_late_init(void) if (!hpet_virt_address) return -ENODEV; + if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) + hpet_msi_capability_lookup(2); + else + hpet_msi_capability_lookup(0); + hpet_reserve_platform_timers(hpet_readl(HPET_ID)); hpet_print_config(); + if (boot_cpu_has(X86_FEATURE_ARAT)) + return 0; + for_each_online_cpu(cpu) { hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); } -- cgit v1.2.3 From 5946fa3d5cdeb846a647a1900026af9f8b08c8b5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 19 Aug 2009 08:44:24 +0100 Subject: x86, hpet: Simplify the HPET code On 64-bits, using unsigned long when unsigned int suffices needlessly creates larger code (due to the need for REX prefixes), and most of the logic in hpet.c really doesn't need 64-bit operations. At once this avoids the need for a couple of type casts. Signed-off-by: Jan Beulich Cc: Shaohua Li Cc: Venkatesh Pallipadi LKML-Reference: <4A8BC9780200007800010832@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hpet.h | 2 +- arch/x86/kernel/hpet.c | 45 +++++++++++++++++++++++---------------------- 2 files changed, 24 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 1c22cb05ad6a..65847c578b70 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -69,7 +69,7 @@ extern int hpet_force_user; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); -extern unsigned long hpet_readl(unsigned long a); +extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); extern void hpet_msi_unmask(unsigned int irq); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 5969e1078fc2..ba575f0f2e34 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -47,12 +47,12 @@ struct hpet_dev { char name[10]; }; -unsigned long hpet_readl(unsigned long a) +inline unsigned int hpet_readl(unsigned int a) { return readl(hpet_virt_address + a); } -static inline void hpet_writel(unsigned long d, unsigned long a) +static inline void hpet_writel(unsigned int d, unsigned int a) { writel(d, hpet_virt_address + a); } @@ -167,7 +167,7 @@ do { \ static void hpet_reserve_msi_timers(struct hpet_data *hd); -static void hpet_reserve_platform_timers(unsigned long id) +static void hpet_reserve_platform_timers(unsigned int id) { struct hpet __iomem *hpet = hpet_virt_address; struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; @@ -205,7 +205,7 @@ static void hpet_reserve_platform_timers(unsigned long id) } #else -static void hpet_reserve_platform_timers(unsigned long id) { } +static void hpet_reserve_platform_timers(unsigned int id) { } #endif /* @@ -246,7 +246,7 @@ static void hpet_reset_counter(void) static void hpet_start_counter(void) { - unsigned long cfg = hpet_readl(HPET_CFG); + unsigned int cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } @@ -271,7 +271,7 @@ static void hpet_resume_counter(void) static void hpet_enable_legacy_int(void) { - unsigned long cfg = hpet_readl(HPET_CFG); + unsigned int cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_LEGACY; hpet_writel(cfg, HPET_CFG); @@ -314,7 +314,7 @@ static int hpet_setup_msi_irq(unsigned int irq); static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { - unsigned long cfg, cmp, now; + unsigned int cfg, cmp, now; uint64_t delta; switch (mode) { @@ -323,7 +323,7 @@ static void hpet_set_mode(enum clock_event_mode mode, delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned long) delta; + cmp = now + (unsigned int) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); /* Make sure we use edge triggered interrupts */ cfg &= ~HPET_TN_LEVEL; @@ -339,7 +339,7 @@ static void hpet_set_mode(enum clock_event_mode mode, * (See AMD-8111 HyperTransport I/O Hub Data Sheet, * Publication # 24674) */ - hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); + hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer)); hpet_start_counter(); hpet_print_config(); break; @@ -387,9 +387,9 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); + WARN_ON_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt); - return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; + return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } static void hpet_legacy_set_mode(enum clock_event_mode mode, @@ -415,7 +415,7 @@ static struct hpet_dev *hpet_devs; void hpet_msi_unmask(unsigned int irq) { struct hpet_dev *hdev = get_irq_data(irq); - unsigned long cfg; + unsigned int cfg; /* unmask it */ cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); @@ -425,7 +425,7 @@ void hpet_msi_unmask(unsigned int irq) void hpet_msi_mask(unsigned int irq) { - unsigned long cfg; + unsigned int cfg; struct hpet_dev *hdev = get_irq_data(irq); /* mask it */ @@ -600,7 +600,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { struct hpet_dev *hdev = &hpet_devs[num_timers_used]; - unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); + unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); /* Only consider HPET timer with MSI support */ if (!(cfg & HPET_TN_FSB_CAP)) @@ -815,7 +815,7 @@ static int hpet_clocksource_register(void) */ int __init hpet_enable(void) { - unsigned long id; + unsigned int id; int i; if (!is_hpet_capable()) @@ -933,7 +933,7 @@ fs_initcall(hpet_late_init); void hpet_disable(void) { if (is_hpet_capable()) { - unsigned long cfg = hpet_readl(HPET_CFG); + unsigned int cfg = hpet_readl(HPET_CFG); if (hpet_legacy_int_enabled) { cfg &= ~HPET_CFG_LEGACY; @@ -973,8 +973,8 @@ static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; static u32 hpet_t1_cmp; -static unsigned long hpet_default_delta; -static unsigned long hpet_pie_delta; +static u32 hpet_default_delta; +static u32 hpet_pie_delta; static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; @@ -1025,7 +1025,8 @@ EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); */ int hpet_rtc_timer_init(void) { - unsigned long cfg, cnt, delta, flags; + unsigned int cfg, cnt, delta; + unsigned long flags; if (!is_hpet_enabled()) return 0; @@ -1035,7 +1036,7 @@ int hpet_rtc_timer_init(void) clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; - hpet_default_delta = (unsigned long) clc; + hpet_default_delta = clc; } if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) @@ -1121,7 +1122,7 @@ int hpet_set_periodic_freq(unsigned long freq) clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; do_div(clc, freq); clc >>= hpet_clockevent.shift; - hpet_pie_delta = (unsigned long) clc; + hpet_pie_delta = clc; } return 1; } @@ -1135,7 +1136,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); static void hpet_rtc_timer_reinit(void) { - unsigned long cfg, delta; + unsigned int cfg, delta; int lost_ints = -1; if (unlikely(!hpet_rtc_flags)) { -- cgit v1.2.3 From eb13296cfaf6c699566473669a96a38a90562384 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:13 -0400 Subject: x86: Instruction decoder API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add x86 instruction decoder to arch-specific libraries. This decoder can decode x86 instructions used in kernel into prefix, opcode, modrm, sib, displacement and immediates. This can also show the length of instructions. This version introduces instruction attributes for decoding instructions. The instruction attribute tables are generated from the opcode map file (x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk). Currently, the opcode maps are based on opcode maps in Intel(R) 64 and IA-32 Architectures Software Developers Manual Vol.2: Appendix.A, and consist of below two types of opcode tables. 1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are written as below; Table: table-name Referrer: escaped-name opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] (or) opcode: escape # escaped-name EndTable Group opcodes, which has 8 elements, are written as below; GrpTable: GrpXXX reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] EndTable These opcode maps include a few SSE and FP opcodes (for setup), because those opcodes are used in the kernel. Signed-off-by: Masami Hiramatsu Signed-off-by: Jim Keniston Acked-by: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/inat.h | 188 +++++++++ arch/x86/include/asm/inat_types.h | 29 ++ arch/x86/include/asm/insn.h | 143 +++++++ arch/x86/lib/Makefile | 13 + arch/x86/lib/inat.c | 78 ++++ arch/x86/lib/insn.c | 464 ++++++++++++++++++++++ arch/x86/lib/x86-opcode-map.txt | 719 +++++++++++++++++++++++++++++++++++ arch/x86/tools/gen-insn-attr-x86.awk | 314 +++++++++++++++ 8 files changed, 1948 insertions(+) create mode 100644 arch/x86/include/asm/inat.h create mode 100644 arch/x86/include/asm/inat_types.h create mode 100644 arch/x86/include/asm/insn.h create mode 100644 arch/x86/lib/inat.c create mode 100644 arch/x86/lib/insn.c create mode 100644 arch/x86/lib/x86-opcode-map.txt create mode 100644 arch/x86/tools/gen-insn-attr-x86.awk (limited to 'arch/x86') diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 000000000000..2866fddd1848 --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,188 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy instruction prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ +#define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ + +#define INAT_LPREFIX_MAX 3 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_REXPFX (1 << INAT_FLAG_OFFS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 5)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); + +/* Attribute checking functions */ +static inline int inat_is_prefix(insn_attr_t attr) +{ + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return attr & INAT_REXPFX; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 000000000000..12b4e3751d3f --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,143 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 07c31899c9c2..c77f8a7c531d 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 000000000000..054656a01dfd --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,78 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 000000000000..dfd56a30053f --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,464 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include +#include +#include + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 000000000000..083dd59dd74b --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,719 @@ +# x86 Opcode Maps +# +# +# Table: table-name +# Referrer: escaped-name +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +# +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# + +Table: one byte opcode +Referrer: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) +c5: LDS Gz,Mp (i64) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode # First Byte is 0x0f +Referrer: 2-byte escape +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev +0e: +0f: +# 0x0f 0x10-0x1f +10: +11: +12: +13: +14: +15: +16: +17: +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps | movapd Vpd,Wpd (66) +29: movaps Wps,Vps | movapd Wpd,Vpd (66) +2a: +2b: +2c: +2d: +2e: +2f: +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: +51: +52: +53: +54: +55: +56: +57: +58: +59: +5a: +5b: +5c: +5d: +5e: +5f: +# 0x0f 0x60-0x6f +60: +61: +62: +63: +64: +65: +66: +67: +68: +69: +6a: +6b: +6c: +6d: +6e: +6f: +# 0x0f 0x70-0x7f +70: +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: +75: +76: +77: +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: +7d: +7e: +7f: +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: +c3: movnti Md/q,Gd/q +c4: +c5: +c6: +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: +d1: +d2: +d3: +d4: +d5: +d6: +d7: +d8: +d9: +da: +db: +dc: +dd: +de: +df: +# 0x0f 0xe0-0xef +e0: +e1: +e2: +e3: +e4: +e5: +e6: +e7: +e8: +e9: +ea: +eb: +ec: +ed: +ee: +ef: +# 0x0f 0xf0-0xff +f0: +f1: +f2: +f3: +f4: +f5: +f6: +f7: +f8: +f9: +fa: +fb: +fc: +fd: +fe: +ff: +EndTable + +Table: 3-byte opcode 1 +Referrer: 3-byte escape 1 +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 +Referrer: 3-byte escape 2 +# all opcode is for SSE +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +EndTable + +GrpTable: Grp13 +EndTable + +GrpTable: Grp14 +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr +3: stmxcsr +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..93b62c92d044 --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -0,0 +1,314 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +BEGIN { + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */" + ggid = 1 + geid = 1 + + opnd_expr = "^[[:alpha:]]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[[:alnum:]]+" + + imm_expr = "^[IJAO][[:lower:]]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + + modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\(66\\)" + delete lptable1 + lprefix2_expr = "\\(F2\\)" + delete lptable2 + lprefix3_expr = "\\(F3\\)" + delete lptable3 + max_lprefix = 4 + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + + delete table + delete etable + delete gtable + eid = -1 + gid = -1 +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" +} + +/^Referrer:/ { + if (NF == 1) { + # primary opcode table + tname = "inat_primary_table" + eid = -1 + } else { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + } + } + print "" + delete table + delete lptable1 + delete lptable2 + delete lptable3 + gid = -1 + eid = -1 +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(opnd, i,imm,mod) +{ + imm = null + mod = null + for (i in opnd) { + i = opnd[i] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + split($(i++), opnds, ",") + flags = convert_operands(opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_REXPFX") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } else { + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LPREFIX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LPREFIX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};" +} -- cgit v1.2.3 From ca0e9badd1a39fecdd235f4bf1481b9da756e27b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:21 -0400 Subject: x86: X86 instruction decoder build-time selftest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a user-space selftest of x86 instruction decoder at kernel build time. When CONFIG_X86_DECODER_SELFTEST=y, Kbuild builds a test harness of x86 instruction decoder and performs it after building vmlinux. The test compares the results of objdump and x86 instruction decoder code and check there are no differences. Signed-off-by: Masami Hiramatsu Signed-off-by: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203421.31965.29006.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/Kconfig.debug | 9 ++++ arch/x86/Makefile | 3 ++ arch/x86/tools/Makefile | 15 ++++++ arch/x86/tools/distill.awk | 42 ++++++++++++++++ arch/x86/tools/test_get_len.c | 113 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+) create mode 100644 arch/x86/tools/Makefile create mode 100644 arch/x86/tools/distill.awk create mode 100644 arch/x86/tools/test_get_len.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6bb..7d0b681a132b 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,6 +186,15 @@ config X86_DS_SELFTEST config HAVE_MMIOTRACE_SUPPORT def_bool y +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + # # IO delay types: # diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1b68659c41b4..5fe16bfd15ac 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -154,6 +154,9 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 000000000000..3dd626b99dc8 --- /dev/null +++ b/arch/x86/tools/Makefile @@ -0,0 +1,15 @@ +PHONY += posttest +quiet_cmd_posttest = TEST $@ + cmd_posttest = $(OBJDUMP) -d $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len + +posttest: $(obj)/test_get_len vmlinux + $(call cmd,posttest) + +hostprogs-y := test_get_len + +# -I needed for generated C source and C source which in the kernel tree. +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ + +# Dependancies are also needed. +$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 000000000000..d433619bb866 --- /dev/null +++ b/arch/x86/tools/distill.awk @@ -0,0 +1,42 @@ +#!/bin/awk -f +# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Distills the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 000000000000..1e81adb2d8a9 --- /dev/null +++ b/arch/x86/tools/test_get_len.c @@ -0,0 +1,113 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include +#include +#include +#include + +#ifdef __x86_64__ +#define CONFIG_X86_64 +#else +#define CONFIG_X86_32 +#endif +#define unlikely(cond) (cond) + +#include +#include +#include + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + */ + +const char *prog; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " ./test_get_len\n"); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE]; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0; + + prog = argv[0]; + if (argc > 1) + usage(); + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ +#ifdef __x86_64__ + insn_init(&insn, insn_buf, 1); +#else + insn_init(&insn, insn_buf, 0); +#endif + insn_get_length(&insn); + if (insn.length != nb) { + fprintf(stderr, "Error: %s", line); + fprintf(stderr, "Error: objdump says %d bytes, but " + "insn_get_length() says %d (attr:%x)\n", nb, + insn.length, insn.attr); + exit(2); + } + } + fprintf(stderr, "Succeed: decoded and checked %d instructions\n", + insns); + return 0; +} -- cgit v1.2.3 From b46b3d70c9c017d7c4ec49f7f3ffd0af5a622277 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:28 -0400 Subject: kprobes: Checks probe address is instruction boudary on x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure safeness of inserting kprobes by checking whether the specified address is at the first byte of an instruction on x86. This is done by decoding probed function from its head to the probe point. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203428.31965.21939.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/kprobes.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..aa15f3e1f64b 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -48,12 +48,14 @@ #include #include #include +#include #include #include #include #include #include +#include void jprobe_return_end(void); @@ -244,6 +246,75 @@ retry: } } +/* Recover the probed instruction at addr for further analysis. */ +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + kp = get_kprobe((void *)addr); + if (!kp) + return -EINVAL; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, fix_riprel() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return 0; +} + +/* Dummy buffers for kallsyms_lookup */ +static char __dummy_buf[KSYM_NAME_LEN]; + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + int ret; + unsigned long addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + kernel_insn_init(&insn, (void *)addr); + insn_get_opcode(&insn); + + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { + ret = recover_probed_instruction(buf, addr); + if (ret) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; + kernel_insn_init(&insn, buf); + } + insn_get_length(&insn); + addr += insn.length; + } + + return (addr == paddr); +} + /* * Returns non-zero if opcode modifies the interrupt flag. */ @@ -359,6 +430,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) -- cgit v1.2.3 From 89ae465b0ee470f7d3f8a1c61353445c3acbbe2a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:36 -0400 Subject: kprobes: Cleanup fix_riprel() using insn decoder on x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup fix_riprel() in arch/x86/kernel/kprobes.c by using the new x86 instruction decoder instead of using comparisons with raw ad hoc numeric opcodes. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203436.31965.34374.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/kprobes.c | 128 +++++++++------------------------------------- 1 file changed, 23 insertions(+), 105 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index aa15f3e1f64b..16ae9610f6ff 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -108,50 +108,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { /* ----------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -static const u32 onebyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ - W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ - W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -static const u32 twobyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ - W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ - W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ - W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ - W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ - W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W struct kretprobe_blackpoint kretprobe_blacklist[] = { @@ -348,68 +304,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static void __kprobes fix_riprel(struct kprobe *p) { #ifdef CONFIG_X86_64 - u8 *insn = p->ainsn.insn; - s64 disp; - int need_modrm; - - /* Skip legacy instruction prefixes. */ - while (1) { - switch (*insn) { - case 0x66: - case 0x67: - case 0x2e: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xf0: - case 0xf3: - case 0xf2: - ++insn; - continue; - } - break; - } + struct insn insn; + kernel_insn_init(&insn, p->ainsn.insn); - /* Skip REX instruction prefix. */ - if (is_REX_prefix(insn)) - ++insn; - - if (*insn == 0x0f) { - /* Two-byte opcode. */ - ++insn; - need_modrm = test_bit(*insn, - (unsigned long *)twobyte_has_modrm); - } else - /* One-byte opcode. */ - need_modrm = test_bit(*insn, - (unsigned long *)onebyte_has_modrm); - - if (need_modrm) { - u8 modrm = *++insn; - if ((modrm & 0xc7) == 0x05) { - /* %rip+disp32 addressing mode */ - /* Displacement follows ModRM byte. */ - ++insn; - /* - * The copied instruction uses the %rip-relative - * addressing mode. Adjust the displacement for the - * difference between the original location of this - * instruction and the location of the copy that will - * actually be run. The tricky bit here is making sure - * that the sign extension happens correctly in this - * calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the - * %rip value and yield the same 64-bit result that the - * sign-extension of the original signed 32-bit - * displacement would have given. - */ - disp = (u8 *) p->addr + *((s32 *) insn) - - (u8 *) p->ainsn.insn; - BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ - *(s32 *)insn = (s32) disp; - } + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) p->addr + (s64) insn.displacement.value - + (u8 *) p->ainsn.insn; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; } #endif } -- cgit v1.2.3 From b1cf540f0e5278ecfe8532557e547d833ed269d7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:44 -0400 Subject: x86: Add pt_regs register and stack access APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add following APIs for accessing registers and stack entries from pt_regs. These APIs are required by kprobes-based event tracer on ftrace. Some other debugging tools might be able to use it too. - regs_query_register_offset(const char *name) Query the offset of "name" register. - regs_query_register_name(unsigned int offset) Query the name of register by its offset. - regs_get_register(struct pt_regs *regs, unsigned int offset) Get the value of a register by its offset. - regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) Check the address is in the kernel stack. - regs_get_kernel_stack_nth(struct pt_regs *reg, unsigned int nth) Get Nth entry of the kernel stack. (N >= 0) - regs_get_argument_nth(struct pt_regs *reg, unsigned int nth) Get Nth argument at function call. (N >= 0) Signed-off-by: Masami Hiramatsu Cc: linux-arch@vger.kernel.org Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203444.31965.26374.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/ptrace.h | 62 +++++++++++++++++++++++ arch/x86/kernel/ptrace.c | 112 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908349aa..a3d49dd7d26e 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include +#include #endif #ifndef __ASSEMBLY__ @@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->sp; } +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register whose offset from @regs + * is @offset. The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kenel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +/* Get Nth argument at function call */ +extern unsigned long regs_get_argument_nth(struct pt_regs *regs, + unsigned int n); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 8d7d5c9c1be3..a33a17d5d5c8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -49,6 +49,118 @@ enum x86_regset { REGSET_IOPERM32, }; +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; + +/** + * regs_get_argument_nth() - get Nth argument at function call + * @regs: pt_regs which contains registers at function entry. + * @n: argument number. + * + * regs_get_argument_nth() returns @n th argument of a function call. + * Since usually the kernel stack will be changed right after function entry, + * you must use this at function entry. If the @n th entry is NOT in the + * kernel stack or pt_regs, this returns 0. + */ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) +{ + if (n < ARRAY_SIZE(arg_offs_table)) + return *((unsigned long *)regs + arg_offs_table[n]); + else { + /* + * The typical case: arg n is on the stack. + * (Note: stack[0] = return address, so skip it) + */ + n -= ARRAY_SIZE(arg_offs_table); + return regs_get_kernel_stack_nth(regs, 1 + n); + } +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. -- cgit v1.2.3 From 8d7d14fb27818eb08ebedf9f4a6e286970fe9977 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 15:43:07 -0400 Subject: x86: Fix x86 instruction decoder selftest to check only .text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix x86 instruction decoder selftest to check only .text because other sections (e.g. .notes) will have random bytes which don't need to be checked. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090821194307.12478.76938.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/tools/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 3dd626b99dc8..95e9cc4bcd94 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,6 +1,6 @@ PHONY += posttest quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) -- cgit v1.2.3 From 69d991f32152283cbc373136fa45bbb152b32048 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 15:43:16 -0400 Subject: x86: Check awk features before generating inat-tables.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check some awk mandatory features to generate inat-tables.c that old mawk doesn't support. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090821194316.12478.57394.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/tools/gen-insn-attr-x86.awk | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 93b62c92d044..19ba096b7dd1 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -4,7 +4,25 @@ # # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c +# Awk implementation sanity check +function check_awk_implement() { + if (!match("abc", "[[:lower:]]+")) + return "Your awk doesn't support charactor-class." + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables print "/* x86 opcode map generated from x86-opcode-map.txt */" print "/* Do not change this code. */" ggid = 1 @@ -293,6 +311,8 @@ function convert_operands(opnd, i,imm,mod) } END { + if (awkchecked != "") + exit 1 # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ -- cgit v1.2.3 From 24851d2447830e6cba4c4b641cb73e713f312373 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Aug 2009 23:38:30 +0200 Subject: tracing/kprobes: Dump the culprit kprobe in case of kprobe recursion Kprobes can enter into a probing recursion, ie: a kprobe that does an endless loop because one of its core mechanism function used during probing is also probed itself. This patch helps pinpointing the kprobe that raised such recursion by dumping it and raising a BUG instead of a warning (we also disarm the kprobe to try avoiding recursion in BUG itself). Having a BUG instead of a warning stops the stacktrace in the right place and doesn't pollute the logs with hundreds of traces that eventually end up in a stack overflow. Signed-off-by: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli --- arch/x86/kernel/kprobes.c | 8 ++++++-- include/linux/kprobes.h | 2 ++ kernel/kprobes.c | 7 +++++++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 16ae9610f6ff..ecee3d23fef8 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -490,9 +490,13 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, /* A probe has been hit in the codepath leading up * to, or just after, single-stepping of a probed * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. + * reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless + * reentering loop and eventually a stack overflow. */ + arch_disarm_kprobe(p); + dump_kprobe(p); + BUG(); } default: /* impossible cases */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bcd9c07848be..87eb79c9dd60 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); +void dump_kprobe(struct kprobe *kp); + #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ef177d653b2c..f72e96c25a38 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1141,6 +1141,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) arch_remove_kprobe(p); } +void __kprobes dump_kprobe(struct kprobe *kp) +{ + printk(KERN_WARNING "Dumping kprobe:\n"); + printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", + kp->symbol_name, kp->addr, kp->offset); +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) -- cgit v1.2.3 From c8bc6f3c806f1fcbfdbf0b1ff6c52dba59192d3b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 4 Aug 2009 12:07:09 -0700 Subject: x86: arch specific support for remapping HPET MSIs x86 arch support for remapping HPET MSI's by associating the HPET timer block with the interrupt-remapping HW unit and setting up appropriate irq_chip Signed-off-by: Suresh Siddha Cc: Venkatesh Pallipadi Cc: David Woodhouse Cc: Jesse Barnes Cc: Jay Fenlason LKML-Reference: <20090804190729.630510000@intel.com> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/hpet.h | 5 +++-- arch/x86/kernel/acpi/boot.c | 1 + arch/x86/kernel/apic/io_apic.c | 49 +++++++++++++++++++++++++++++++++++------- arch/x86/kernel/hpet.c | 3 ++- 4 files changed, 47 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 65847c578b70..5d89fd2a3690 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -65,6 +65,7 @@ /* hpet memory map physical address */ extern unsigned long hpet_address; extern unsigned long force_hpet_address; +extern u8 hpet_blockid; extern int hpet_force_user; extern int is_hpet_enabled(void); extern int hpet_enable(void); @@ -78,9 +79,9 @@ extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); #ifdef CONFIG_PCI_MSI -extern int arch_setup_hpet_msi(unsigned int irq); +extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); #else -static inline int arch_setup_hpet_msi(unsigned int irq) +static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) { return -EINVAL; } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6b8ca3a0285d..eae642b0f345 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -624,6 +624,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) } hpet_address = hpet_tbl->address.address; + hpet_blockid = hpet_tbl->sequence; /* * Some broken BIOSes advertise HPET at 0x0. We really do not diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d2ed6c5ddc80..d9c6f14d3b32 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3254,7 +3254,8 @@ void destroy_irq(unsigned int irq) * MSI message composition */ #ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, + struct msi_msg *msg, u8 hpet_id) { struct irq_cfg *cfg; int err; @@ -3288,7 +3289,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms irte.dest_id = IRTE_DEST(dest); /* Set source-id of interrupt request */ - set_msi_sid(&irte, pdev); + if (pdev) + set_msi_sid(&irte, pdev); + else + set_hpet_sid(&irte, hpet_id); modify_irte(irq, &irte); @@ -3453,7 +3457,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) int ret; struct msi_msg msg; - ret = msi_compose_msg(dev, irq, &msg); + ret = msi_compose_msg(dev, irq, &msg, -1); if (ret < 0) return ret; @@ -3586,7 +3590,7 @@ int arch_setup_dmar_msi(unsigned int irq) int ret; struct msi_msg msg; - ret = msi_compose_msg(NULL, irq, &msg); + ret = msi_compose_msg(NULL, irq, &msg, -1); if (ret < 0) return ret; dmar_msi_write(irq, &msg); @@ -3626,6 +3630,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) #endif /* CONFIG_SMP */ +static struct irq_chip ir_hpet_msi_type = { + .name = "IR-HPET_MSI", + .unmask = hpet_msi_unmask, + .mask = hpet_msi_mask, +#ifdef CONFIG_INTR_REMAP + .ack = ir_ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif +#endif + .retrigger = ioapic_retrigger_irq, +}; + static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .unmask = hpet_msi_unmask, @@ -3637,20 +3654,36 @@ static struct irq_chip hpet_msi_type = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_hpet_msi(unsigned int irq) +int arch_setup_hpet_msi(unsigned int irq, unsigned int id) { int ret; struct msi_msg msg; struct irq_desc *desc = irq_to_desc(irq); - ret = msi_compose_msg(NULL, irq, &msg); + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_hpet_to_ir(id); + int index; + + if (!iommu) + return -1; + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + return -1; + } + + ret = msi_compose_msg(NULL, irq, &msg, id); if (ret < 0) return ret; hpet_msi_write(irq, &msg); desc->status |= IRQ_MOVE_PCNTXT; - set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, - "edge"); + if (irq_remapped(irq)) + set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, + handle_edge_irq, "edge"); + else + set_irq_chip_and_handler_name(irq, &hpet_msi_type, + handle_edge_irq, "edge"); return 0; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba575f0f2e34..7f024ff47d1d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -33,6 +33,7 @@ * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; +u8 hpet_blockid; /* OS timer block num */ #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; #endif @@ -467,7 +468,7 @@ static int hpet_msi_next_event(unsigned long delta, static int hpet_setup_msi_irq(unsigned int irq) { - if (arch_setup_hpet_msi(irq)) { + if (arch_setup_hpet_msi(irq, hpet_blockid)) { destroy_irq(irq); return -EINVAL; } -- cgit v1.2.3 From e9afe9e1b3fdbd56cca53959a2519e70db9c8095 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:22:58 -0400 Subject: kprobes/x86: Call BUG() when reentering probe into KPROBES_HIT_SS Call BUG() when a probe have been hit on the way of kprobe processing path, because that kind of probes are currently unrecoverable (recovering it will cause an infinite loop and stack overflow). The original code seems to assume that it's caused by an int3 which another subsystem inserted on out-of-line singlestep buffer if the hitting probe is same as current probe. However, in that case, int3-hitting-address is on the out-of-line buffer and should be different from first (current) int3 address. Thus, I decided to remove the code. I also removes arch_disarm_kprobe() because it will involve other stuffs in text_poke(). Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172258.8246.61889.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/kprobes.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index ecee3d23fef8..e0fb615ba1e9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -482,22 +482,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; break; case KPROBE_HIT_SS: - if (p == kprobe_running()) { - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= kcb->kprobe_saved_flags; - return 0; - } else { - /* A probe has been hit in the codepath leading up - * to, or just after, single-stepping of a probed - * instruction. This entire codepath should strictly - * reside in .kprobes.text section. - * Raise a BUG or we'll continue in an endless - * reentering loop and eventually a stack overflow. - */ - arch_disarm_kprobe(p); - dump_kprobe(p); - BUG(); - } + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); default: /* impossible cases */ WARN_ON(1); -- cgit v1.2.3 From f5ad31158d60946b9fd18c8a79c283a6bc432430 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:04 -0400 Subject: kprobes/x86-64: Allow to reenter probe on post_handler Allow to reenter probe on the post_handler of another probe on x86-64, because x86-64 already allows reentering int3. In that case, reentered probe just increases kp.nmissed and returns. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172304.8246.4822.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/kprobes.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e0fb615ba1e9..c5f1f117e0c0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -463,17 +463,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: -#ifdef CONFIG_X86_64 - /* TODO: Provide re-entrancy from post_kprobes_handler() and - * avoid exception stack corruption while single-stepping on - * the instruction of the new probe. - */ - arch_disarm_kprobe(p); - regs->ip = (unsigned long)p->addr; - reset_current_kprobe(); - preempt_enable_no_resched(); - break; -#endif case KPROBE_HIT_ACTIVE: save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); -- cgit v1.2.3 From 62c9295f9dd250ea1bb2c8078642a275a9ce82f8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:11 -0400 Subject: kprobes/x86: Fix to add __kprobes to in-kernel fault handing functions Add __kprobes to the functions which handle in-kernel fixable page faults. Since kprobes can cause those in-kernel page faults by accessing kprobe data structures, probing those fault functions will cause fault-int3-loop (do_page_fault has already been marked as __kprobes). Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172311.8246.92725.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/mm/fault.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bfae139182ff..c322e59f2d10 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -38,7 +38,8 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) +static inline int __kprobes +kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) @@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int notify_page_fault(struct pt_regs *regs) +static inline int __kprobes notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -239,7 +240,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -361,7 +362,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -858,7 +859,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int +static noinline __kprobes int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; -- cgit v1.2.3 From 8222d718b3ad3ae49c48f69ae4b6a1128c9a92cf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:25 -0400 Subject: kprobes/x86-64: Fix to move common_interrupt to .kprobes.text Since nmi, debug and int3 returns to irq_return inside common_interrupt, probing this function will cause int3-loop, so it should be marked as __kprobes. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172325.8246.40000.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/entry_64.S | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be745107..36e2ef5cc83f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -809,6 +809,10 @@ END(interrupt) call \func .endm +/* + * Interrupt entry/exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -947,6 +951,10 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) +/* + * End of kprobes section + */ + .popsection /* * APIC interrupts. -- cgit v1.2.3 From 50a482fbd96943516b7a2783900e8fe61a6425e7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Aug 2009 18:13:19 -0400 Subject: x86: Allow x86-32 instruction decoder selftest on x86-64 Pass $(CONFIG_64BIT) to the x86 insn decoder selftest in case we are decoding 32bit code on x86-64, which will happen when building kernel with ARCH=i386 on x86-64. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ingo Molnar LKML-Reference: <20090828221319.8778.88508.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/tools/Makefile | 2 +- arch/x86/tools/test_get_len.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 95e9cc4bcd94..1bd006c81564 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,6 +1,6 @@ PHONY += posttest quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(CONFIG_64BIT) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index 1e81adb2d8a9..a3273f4244d5 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -45,7 +45,7 @@ const char *prog; static void usage(void) { fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " ./test_get_len\n"); + " %s [y|n](64bit flag)\n", prog); exit(1); } @@ -63,11 +63,15 @@ int main(int argc, char **argv) unsigned char insn_buf[16]; struct insn insn; int insns = 0; + int x86_64 = 0; prog = argv[0]; - if (argc > 1) + if (argc > 2) usage(); + if (argc == 2 && argv[1][0] == 'y') + x86_64 = 1; + while (fgets(line, BUFSIZE, stdin)) { char copy[BUFSIZE], *s, *tab1, *tab2; int nb = 0; @@ -93,11 +97,7 @@ int main(int argc, char **argv) break; } /* Decode an instruction */ -#ifdef __x86_64__ - insn_init(&insn, insn_buf, 1); -#else - insn_init(&insn, insn_buf, 0); -#endif + insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { fprintf(stderr, "Error: %s", line); -- cgit v1.2.3 From 70069577323e6f72b845166724f34b9858134437 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Aug 2009 18:13:26 -0400 Subject: x86: Remove unused config macros from instruction decoder selftest Remove dummy definitions of CONFIG_X86_64 and CONFIG_X86_32 because those macros are not used in the instruction decoder anymore. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ingo Molnar LKML-Reference: <20090828221326.8778.70723.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- arch/x86/tools/test_get_len.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index a3273f4244d5..376d33852191 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -21,11 +21,6 @@ #include #include -#ifdef __x86_64__ -#define CONFIG_X86_64 -#else -#define CONFIG_X86_32 -#endif #define unlikely(cond) (cond) #include -- cgit v1.2.3 From f12b4f546b4e327d5620a544a2bddab68de66027 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 8 Sep 2009 12:32:46 -0400 Subject: x86: Add MMX support for instruction decoder Add MMX/SSE instructions to x86 opcode maps, since some of those instructions are used in the kernel. This also fixes failures in the x86 instruction decoder seftest. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Sam Ravnborg Cc: Frederic Weisbecker Cc: Ingo Molnar LKML-Reference: <20090908163246.23516.78835.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/lib/x86-opcode-map.txt | 307 ++++++++++++++++++++++++++-------------- 1 file changed, 200 insertions(+), 107 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 083dd59dd74b..59e20d5c2a52 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -310,14 +310,14 @@ Referrer: 2-byte escape 0e: 0f: # 0x0f 0x10-0x1f -10: -11: -12: -13: -14: -15: -16: -17: +10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) +11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) +12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) +13: mpvlps Mq,Vq | movlpd Mq,Vq (66) +14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) +15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) +16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) +17: movhps Mq,Vq | movhpd Mq,Vq (66) 18: Grp16 (1A) 19: 1a: @@ -337,12 +337,12 @@ Referrer: 2-byte escape 27: 28: movaps Vps,Wps | movapd Vpd,Wpd (66) 29: movaps Wps,Vps | movapd Wpd,Vpd (66) -2a: -2b: -2c: -2d: -2e: -2f: +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) +2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) +2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) +2f: comiss Vss,Wss | comisd Vsd,Wsd (66) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -378,56 +378,56 @@ Referrer: 2-byte escape 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: -51: -52: -53: -54: -55: -56: -57: -58: -59: -5a: -5b: -5c: -5d: -5e: -5f: +50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) +51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) +52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) +53: rcpps Vps,Wps | rcpss Vss,Wss (F3) +54: andps Vps,Wps | andpd Vpd,Wpd (66) +55: andnps Vps,Wps | andnpd Vpd,Wpd (66) +56: orps Vps,Wps | orpd Vpd,Wpd (66) +57: xorps Vps,Wps | xorpd Vpd,Wpd (66) +58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) +59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) +5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) +5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) +5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) +5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) +5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) +5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) # 0x0f 0x60-0x6f -60: -61: -62: -63: -64: -65: -66: -67: -68: -69: -6a: -6b: -6c: -6d: -6e: -6f: +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) +65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) +67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) +6c: punpcklqdq Vdq,Wdq (66) +6d: punpckhqdq Vdq,Wdq (66) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) # 0x0f 0x70-0x7f -70: +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: -75: -76: -77: +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) +77: emms 78: VMREAD Ed/q,Gd/q 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: -7d: -7e: -7f: +7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd +7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) @@ -499,11 +499,11 @@ bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: +c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) c3: movnti Md/q,Gd/q -c4: -c5: -c6: +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) +c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -514,60 +514,131 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: -d1: -d2: -d3: -d4: -d5: -d6: -d7: -d8: -d9: -da: -db: -dc: -dd: -de: -df: +d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) +d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) +da: pminub Pq,Qq | pminub Vdq,Wdq (66) +db: pand Pq,Qq | pand Vdq,Wdq (66) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) +df: pandn Pq,Qq | pandn Vdq,Wdq (66) # 0x0f 0xe0-0xef -e0: -e1: -e2: -e3: -e4: -e5: -e6: -e7: -e8: -e9: -ea: -eb: -ec: -ed: -ee: -ef: +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) +e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) +eb: por Pq,Qq | por Vdq,Wdq (66) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66) # 0x0f 0xf0-0xff -f0: -f1: -f2: -f3: -f4: -f5: -f6: -f7: -f8: -f9: -fa: -fb: -fc: -fd: -fe: +f0: lddqu Vdq,Mdq (F2) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66) ff: EndTable Table: 3-byte opcode 1 Referrer: 3-byte escape 1 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) +04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) +08: psignb Pq,Qq | psignb Vdq,Wdq (66) +09: psignw Pq,Qq | psignw Vdq,Wdq (66) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) +0c: +0d: +0e: +0f: +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66) +18: +19: +1a: +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66) +21: pmovsxbd Vdq,Udq/Md (66) +22: pmovsxbq Vdq,Udq/Mw (66) +23: pmovsxwd Vdq,Udq/Mq (66) +24: pmovsxwq Vdq,Udq/Md (66) +25: pmovsxdq Vdq,Udq/Mq (66) +26: +27: +28: pmuldq Vdq,Wdq (66) +29: pcmpeqq Vdq,Wdq (66) +2a: movntdqa Vdq,Mdq (66) +2b: packusdw Vdq,Wdq (66) +2c: +2d: +2e: +2f: +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66) +31: pmovzxbd Vdq,Udq/Md (66) +32: pmovzxbq Vdq,Udq/Mw (66) +33: pmovzxwd Vdq,Udq/Mq (66) +34: pmovzxwq Vdq,Udq/Md (66) +35: pmovzxdq Vdq,Udq/Mq (66) +36: +37: pcmpgtq Vdq,Wdq (66) +38: pminsb Vdq,Wdq (66) +39: pminsd Vdq,Wdq (66) +3a: pminuw Vdq,Wdq (66) +3b: pminud Vdq,Wdq (66) +3c: pmaxsb Vdq,Wdq (66) +3d: pmaxsd Vdq,Wdq (66) +3e: pmaxuw Vdq,Wdq (66) +3f: pmaxud Vdq,Wdq (66) +# 0x0f 0x38 0x4f-0xff +40: pmulld Vdq,Wdq (66) +41: phminposuw Vdq,Wdq (66) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) @@ -576,7 +647,29 @@ EndTable Table: 3-byte opcode 2 Referrer: 3-byte escape 2 -# all opcode is for SSE +# 0x0f 0x3a 0x00-0xff +08: roundps Vdq,Wdq,Ib (66) +09: roundpd Vdq,Wdq,Ib (66) +0a: roundss Vss,Wss,Ib (66) +0b: roundsd Vsd,Wsd,Ib (66) +0c: blendps Vdq,Wdq,Ib (66) +0d: blendpd Vdq,Wdq,Ib (66) +0e: pblendw Vdq,Wdq,Ib (66) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) +14: pextrb Rd/Mb,Vdq,Ib (66) +15: pextrw Rd/Mw,Vdq,Ib (66) +16: pextrd/pextrq Ed/q,Vdq,Ib (66) +17: extractps Ed,Vdq,Ib (66) +20: pinsrb Vdq,Rd/q/Mb,Ib (66) +21: insertps Vdq,Udq/Md,Ib (66) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) +40: dpps Vdq,Wdq,Ib (66) +41: dppd Vdq,Wdq,Ib (66) +42: mpsadbw Vdq,Wdq,Ib (66) +60: pcmpestrm Vdq,Wdq,Ib (66) +61: pcmpestri Vdq,Wdq,Ib (66) +62: pcmpistrm Vdq,Wdq,Ib (66) +63: pcmpistri Vdq,Wdq,Ib (66) EndTable GrpTable: Grp1 -- cgit v1.2.3 From a00e817f42663941ea0aa5f85a9d1c4f8b212839 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 8 Sep 2009 12:47:55 -0400 Subject: kprobes/x86-32: Move irq-exit functions to kprobes section Move irq-exit functions to .kprobes.text section to protect against kprobes recursion. When I ran kprobe stress test on x86-32, I found below symbols cause unrecoverable recursive probing: ret_from_exception ret_from_intr check_userspace restore_all restore_all_notrace restore_nocheck irq_return And also, I found some interrupt/exception entry points that cause similar problems. This patch moves those symbols (including their container functions) to .kprobes.text section to prevent any kprobes probing. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Ingo Molnar LKML-Reference: <20090908164755.24050.81182.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/entry_32.S | 24 ++++++++++++++++++++++++ kernel/kprobes.c | 2 ++ 2 files changed, 26 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..beb30da203d6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -333,6 +333,10 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) +/* + * Interrupt exit functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to @@ -383,6 +387,10 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -513,6 +521,10 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) +/* + * syscall stub including irq exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -705,6 +717,10 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* * System calls that need a pt_regs pointer. @@ -814,6 +830,10 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC +/* + * Irq entries should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) +/* + * End of kprobes section + */ + .popsection ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3267d90bc9d6..00d01b0f9fee 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -91,6 +91,8 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, {"native_get_debugreg",}, + {"irq_entries_start",}, + {"common_interrupt",}, {NULL} /* Terminator */ }; -- cgit v1.2.3 From ad5cafcdb09c57008c990edd309c0a563b09f238 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:06 -0400 Subject: x86/ptrace: Fix regs_get_argument_nth() to add correct offset Fix regs_get_argument_nth() to add correct offset bytes. Because offset_of() returns offset in byte, the offset should be added to char * instead of unsigned long *. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235306.22412.31613.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a33a17d5d5c8..caffb6809452 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -150,7 +150,7 @@ static const int arg_offs_table[] = { unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) { if (n < ARRAY_SIZE(arg_offs_table)) - return *((unsigned long *)regs + arg_offs_table[n]); + return *(unsigned long *)((char *)regs + arg_offs_table[n]); else { /* * The typical case: arg n is on the stack. -- cgit v1.2.3 From b8a4754147d61f5359a765a3afd3eb03012aa052 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 30 Jul 2009 11:10:02 +0200 Subject: x86, msr: Unify rdmsr_on_cpus/wrmsr_on_cpus Since rdmsr_on_cpus and wrmsr_on_cpus are almost identical, unify them into a common __rwmsr_on_cpus helper thus avoiding code duplication. While at it, convert cpumask_t's to const struct cpumask *. Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 4 ++-- arch/x86/lib/msr.c | 46 +++++++++++++++++++--------------------------- 2 files changed, 21 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba962ff..9a00219b331a 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -247,8 +247,8 @@ do { \ #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3ca22d8..41628b104b9e 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; @@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) - __rdmsr_on_cpu(&rv); + msr_func(&rv); - smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); + smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} EXPORT_SYMBOL(rdmsr_on_cpus); /* @@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.off = cpumask_first(mask); - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - __wrmsr_on_cpu(&rv); - - smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - put_cpu(); + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); -- cgit v1.2.3 From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 14:33:01 +0200 Subject: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() gcc (4.x) supports the __builtin_object_size() builtin, which reports the size of an object that a pointer point to, when known at compile time. If the buffer size is not known at compile time, a constant -1 is returned. This patch uses this feature to add a sanity check to copy_from_user(); if the target buffer is known to be smaller than the copy size, the copy is aborted and a WARNing is emitted in memory debug mode. These extra checks compile away when the object size is not known, or if both the buffer size and the copy length are constants. Signed-off-by: Arjan van de Ven LKML-Reference: <20090926143301.2c396b94@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 19 ++++++++++++++++++- arch/x86/include/asm/uaccess_64.h | 19 ++++++++++++++++++- arch/x86/kernel/x8664_ksyms_64.c | 2 +- arch/x86/lib/copy_user_64.S | 4 ++-- arch/x86/lib/usercopy_32.c | 4 ++-- include/linux/compiler-gcc4.h | 2 ++ include/linux/compiler.h | 4 ++++ 7 files changed, 47 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44b4cb5..582d6aef7417 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -187,9 +187,26 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, +unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b215fc50..ce6fec7ce38d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -21,10 +21,27 @@ copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); +_copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + + static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce3..a0cdd8cc1d67 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,7 +30,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_from_user_inatomic); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7bb85ea..4be3c415b3e9 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -78,7 +78,7 @@ ENTRY(copy_to_user) ENDPROC(copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d462acc..8498684e45b0 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,4 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa597c94d..a3aef5d55dba 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -37,3 +37,5 @@ #define __cold __attribute__((__cold__)) #endif + +#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb5135b4e1..8e54108688f9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -266,6 +266,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Compile time object size, -1 for unknown */ +#ifndef __compiletime_object_size +# define __compiletime_object_size(obj) -1 +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From ff60fab71bb3b4fdbf8caf57ff3739ffd0887396 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 28 Sep 2009 14:21:22 +0200 Subject: x86: Use __builtin_memset and __builtin_memcpy for memset/memcpy GCC provides reasonable memset/memcpy functions itself, with __builtin_memset and __builtin_memcpy. For the "unknown" cases, it'll fall back to our current existing functions, but for fixed size versions it'll inline something smart. Quite often that will be the same as we have now, but sometimes it can do something smarter (for example, if the code then sets the first member of a struct, it can do a shorter memset). In addition, and this is more important, gcc knows which registers and such are not clobbered (while for our asm version it pretty much acts like a compiler barrier), so for various cases it can avoid reloading values. The effect on codesize is shown below on my typical laptop .config: text data bss dec hex filename 5605675 2041100 6525148 14171923 d83f13 vmlinux.before 5595849 2041668 6525148 14162665 d81ae9 vmlinux.after Due to some not-so-good behavior in the gcc 3.x series, this change is only done for GCC 4.x and above. Signed-off-by: Arjan van de Ven LKML-Reference: <20090928142122.6fc57e9c@infradead.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/string_32.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e617181..3d3e8353ee5c 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) */ #ifndef CONFIG_KMEMCHECK + +#if (__GNUC__ >= 4) +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) +#else #define memcpy(t, f, n) \ (__builtin_constant_p((n)) \ ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) +#endif #else /* * kmemcheck becomes very happy if we use the REP instructions unconditionally, @@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +#if (__GNUC__ >= 4) +#define memset(s, c, count) __builtin_memset(s, c, count) +#else #define memset(s, c, count) \ (__builtin_constant_p(c) \ ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ (count)) \ : __memset((s), (c), (count))) +#endif /* * find the first occurrence of byte 'c', or 1 past the area if none -- cgit v1.2.3 From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Sep 2009 13:05:23 +0200 Subject: x86: Turn the copy_from_user check into an (optional) compile time warning A previous patch added the buffer size check to copy_from_user(). One of the things learned from analyzing the result of the previous patch is that in general, gcc is really good at proving that the code contains sufficient security checks to not need to do a runtime check. But that for those cases where gcc could not prove this, there was a relatively high percentage of real security issues. This patch turns the case of "gcc cannot prove" into a compile time warning, as long as a sufficiently new gcc is in use that supports this. The objective is that these warnings will trigger developers checking new cases out before a security hole enters a linux kernel release. Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: "David S. Miller" Cc: James Morris Cc: Jan Beulich LKML-Reference: <20090930130523.348ae6c4@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 12 +++++++++--- arch/x86/lib/usercopy_32.c | 6 ++++++ include/linux/compiler-gcc4.h | 3 +++ include/linux/compiler.h | 4 ++++ 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 582d6aef7417..952f9e793c3e 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -191,6 +191,13 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STACKOVERFLOW + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) @@ -200,10 +207,9 @@ static inline unsigned long __must_check copy_from_user(void *to, if (likely(sz == -1 || sz >= n)) ret = _copy_from_user(to, from, n); -#ifdef CONFIG_DEBUG_VM else - WARN(1, "Buffer overflow detected!\n"); -#endif + copy_from_user_overflow(); + return ret; } diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 8498684e45b0..e218d5df85ff 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -883,3 +883,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index a3aef5d55dba..f1709c1f9eae 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -39,3 +39,6 @@ #endif #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#if __GNUC_MINOR__ >= 4 +#define __compiletime_warning(message) __attribute__((warning(message))) +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8e54108688f9..950356311f12 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -270,6 +270,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 #endif +#ifndef __compiletime_warning +# define __compiletime_warning(message) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3 From 7c68af6e32c73992bad24107311f3433c89016e2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Sep 2009 09:40:22 +0300 Subject: core, x86: Add user return notifiers Add a general per-cpu notifier that is called whenever the kernel is about to return to userspace. The notifier uses a thread_info flag and existing checks, so there is no impact on user return or context switch fast paths. This will be used initially to speed up KVM task switching by lazily updating MSRs. Signed-off-by: Avi Kivity LKML-Reference: <1253342422-13811-1-git-send-email-avi@redhat.com> Signed-off-by: H. Peter Anvin --- arch/Kconfig | 10 ++++++++ arch/x86/Kconfig | 1 + arch/x86/include/asm/thread_info.h | 7 ++++-- arch/x86/kernel/process.c | 2 ++ arch/x86/kernel/signal.c | 3 +++ include/linux/user-return-notifier.h | 42 ++++++++++++++++++++++++++++++++ kernel/user-return-notifier.c | 46 ++++++++++++++++++++++++++++++++++++ 7 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 include/linux/user-return-notifier.h create mode 100644 kernel/user-return-notifier.c (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index 7f418bbc261a..4e312fffbfd7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -83,6 +83,13 @@ config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES +config USER_RETURN_NOTIFIER + bool + depends on HAVE_USER_RETURN_NOTIFIER + help + Provide a kernel-internal notification when a cpu is about to + switch to user mode. + config HAVE_IOREMAP_PROT bool @@ -126,4 +133,7 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool +config HAVE_USER_RETURN_NOTIFIER + bool + source "kernel/gcov/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c087..1df175d15aa8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,7 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_ARCH_KMEMCHECK + select HAVE_USER_RETURN_NOTIFIER config OUTPUT_FORMAT string diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2fec4c..375c917c37d2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,6 +108,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -142,13 +144,14 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ + _TIF_USER_RETURN_NOTIFY) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) #define PREEMPT_ACTIVE 0x10000000 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..e51b056fc88f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } + propagate_user_return_notify(prev_p, next_p); } int sys_fork(struct pt_regs *regs) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..c49f90f7957a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -872,6 +873,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (current->replacement_session_keyring) key_replace_session_keyring(); } + if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) + fire_user_return_notifiers(); #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h new file mode 100644 index 000000000000..b6ac056291d7 --- /dev/null +++ b/include/linux/user-return-notifier.h @@ -0,0 +1,42 @@ +#ifndef _LINUX_USER_RETURN_NOTIFIER_H +#define _LINUX_USER_RETURN_NOTIFIER_H + +#ifdef CONFIG_USER_RETURN_NOTIFIER + +#include +#include + +struct user_return_notifier { + void (*on_user_return)(struct user_return_notifier *urn); + struct hlist_node link; +}; + + +void user_return_notifier_register(struct user_return_notifier *urn); +void user_return_notifier_unregister(struct user_return_notifier *urn); + +static inline void propagate_user_return_notify(struct task_struct *prev, + struct task_struct *next) +{ + if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) { + clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY); + set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY); + } +} + +void fire_user_return_notifiers(void); + +#else + +struct user_return_notifier {}; + +static inline void propagate_user_return_notify(struct task_struct *prev, + struct task_struct *next) +{ +} + +static inline void fire_user_return_notifiers(void) {} + +#endif + +#endif diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c new file mode 100644 index 000000000000..530ccb816513 --- /dev/null +++ b/kernel/user-return-notifier.c @@ -0,0 +1,46 @@ + +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); + +#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id()) + +/* + * Request a notification when the current cpu returns to userspace. Must be + * called in atomic context. The notifier will also be called in atomic + * context. + */ +void user_return_notifier_register(struct user_return_notifier *urn) +{ + set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); + hlist_add_head(&urn->link, &URN_LIST_HEAD); +} +EXPORT_SYMBOL_GPL(user_return_notifier_register); + +/* + * Removes a registered user return notifier. Must be called from atomic + * context, and from the same cpu registration occured in. + */ +void user_return_notifier_unregister(struct user_return_notifier *urn) +{ + hlist_del(&urn->link); + if (hlist_empty(&URN_LIST_HEAD)) + clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); +} +EXPORT_SYMBOL_GPL(user_return_notifier_unregister); + +/* Calls registered user return notifiers */ +void fire_user_return_notifiers(void) +{ + struct user_return_notifier *urn; + struct hlist_node *tmp1, *tmp2; + struct hlist_head *head; + + head = &get_cpu_var(return_notifier_list); + hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link) + urn->on_user_return(urn); + put_cpu_var(); +} -- cgit v1.2.3 From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 2 Oct 2009 07:50:50 -0700 Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For automated testing it is useful to have the option to turn the warnings on copy_from_user() etc checks into errors: In function ‘copy_from_user’, inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080, inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503: linux/arch/x86/include/asm/uaccess_32.h:213: error: call to ‘copy_from_user_overflow’ declared with attribute error: copy_from_user buffer size is not provably correct Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <20091002075050.4e9f7641@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 14 ++++++++++++++ arch/x86/include/asm/uaccess_32.h | 4 +++- include/linux/compiler-gcc4.h | 1 + include/linux/compiler.h | 3 +++ 4 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29bb6bb..1bd2e36f1538 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -287,4 +287,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + endmenu diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 952f9e793c3e..0c9825e97f36 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -193,7 +193,9 @@ unsigned long __must_check _copy_from_user(void *to, extern void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STACKOVERFLOW +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else __compiletime_warning("copy_from_user() buffer size is not provably correct") #endif ; diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index f1709c1f9eae..77542c57e20a 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -41,4 +41,5 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) +#define __compiletime_error(message) __attribute__((error(message))) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 950356311f12..88fd4b673cb4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -273,6 +273,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_warning # define __compiletime_warning(message) #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler -- cgit v1.2.3 From 98059e3463383b18fd79181179cd539b74846b47 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 1 Oct 2009 17:11:10 +0200 Subject: x86: AMD Geode LX optimizations Add CPU optimizations for AMD Geode LX. Signed-off-by: Matteo Croce LKML-Reference: <40101cc30910010811v5d15ff4cx9dd57c9cc9b4b045@mail.gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig.cpu | 2 +- arch/x86/Makefile_32.cpu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f9..979de294710d 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -406,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69d..cbf0776dbec1 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx - +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # add at the end to overwrite eventual tuning options from earlier # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) -- cgit v1.2.3 From c0b11d3af164947c71e2491912c5b8418900dafb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Sep 2009 11:20:38 -0700 Subject: x86: Add VIA processor instructions in opcodes decoder Add VIA processor's Padlock instructions(MONTMUL, XSHA1, XSHA256) as parts of the kernel may use them. This fixes the following crash in opcodes decoder selftests: make[2]: `scripts/unifdef' is up to date. TEST posttest Error: c145cf71: f3 0f a6 d0 repz xsha256 Error: objdump says 4 bytes, but insn_get_length() says 3 (attr:0) make[1]: *** [posttest] Error 2 make: *** [bzImage] Error 2 Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Ingo Molnar Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090925182037.10157.3180.stgit@omoto> Signed-off-by: Frederic Weisbecker --- arch/x86/lib/x86-opcode-map.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 59e20d5c2a52..78a0daf12e15 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -469,7 +469,7 @@ a2: CPUID a3: BT Ev,Gv a4: SHLD Ev,Gv,Ib a5: SHLD Ev,Gv,CL -a6: +a6: GrpPDLK a7: GrpRNG a8: PUSH GS (d64) a9: POP GS (d64) @@ -803,6 +803,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + GrpTable: GrpRNG 0: xstore-rng 1: xcrypt-ecb -- cgit v1.2.3 From 30ed1a79f5bf271d33e782afee3323582dcc621e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 3 Oct 2009 19:48:22 +0900 Subject: this_cpu: Implement X86 optimized this_cpu operations Basically the existing percpu ops can be used for this_cpu variants that allow operations also on dynamically allocated percpu data. However, we do not pass a reference to a percpu variable in. Instead a dynamically or statically allocated percpu variable is provided. Preempt, the non preempt and the irqsafe operations generate the same code. It will always be possible to have the requires per cpu atomicness in a single RMW instruction with segment override on x86. 64 bit this_cpu operations are not supported on 32 bit. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 78 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b65a36defeb7..8b5ec19bdef4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -153,6 +153,84 @@ do { \ #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) +#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) + +#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +/* + * Per cpu atomic 64 bit operations are only available under 64 bit. + * 32 bit must fall back to generic operations. + */ +#ifdef CONFIG_X86_64 +#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#endif + /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ -- cgit v1.2.3 From d6c304055b3cecd4ca865769ac7cea97a320727b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Oct 2009 21:43:22 +0200 Subject: x86, msr: Remove the bkl from msr_open() Remove the big kernel lock from msr_open() as it doesn't protect anything there. The only racy event that can happen here is a concurrent cpu shutdown. So let's look at what could be racy during/after the above event: - The cpu_online() check is racy, but the bkl doesn't help about that anyway it disables preemption but we may be chcking another cpu than the current one. Also the cpu can still become offlined between open and read calls. - The cpu_data(cpu) returns a safe pointer too. It won't be released on cpu offlining. But some fields can be changed from arch/x86/kernel/smpboot.c:remove_siblinginfo() : - phys_proc_id - cpu_core_id Those are not read from msr_open(). What we are checking is the x86_capability that is left untouched on offlining. So this removal looks safe. Signed-off-by: Frederic Weisbecker Cc: John Kacur Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sven-Thorsten Dietrich LKML-Reference: <1254944602-7382-1-git-send-email-fweisbec@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/msr.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7dd950094178..c00610963238 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &cpu_data(cpu); - int ret = 0; - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) - ret = -EIO; /* MSR not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* MSR not supported */ + + return 0; } /* -- cgit v1.2.3 From 170a0bc3808909d8ea0f3f9c725c6565efe7f9c4 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Wed, 7 Oct 2009 20:19:32 +0200 Subject: x86, cpuid: Remove the bkl from cpuid_open() Most of the variables are local to the function. It IS possible that for struct cpuinfo_x86 *c c could point to the same area. However, this is used read only. Signed-off-by: John Kacur LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index b07af8861244..ef6928418c8f 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -118,8 +118,6 @@ static int cpuid_open(struct inode *inode, struct file *file) struct cpuinfo_x86 *c; int ret = 0; - lock_kernel(); - cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ @@ -129,7 +127,6 @@ static int cpuid_open(struct inode *inode, struct file *file) if (c->cpuid_level < 0) ret = -EIO; /* CPUID not supported */ out: - unlock_kernel(); return ret; } -- cgit v1.2.3 From d0153ca35d344d9b640dc305031b0703ba3f30f0 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 29 Sep 2009 10:25:24 -0700 Subject: x86, vmi: Mark VMI deprecated and schedule it for removal Add text in feature-removal.txt indicating that VMI will be removed in the 2.6.37 timeframe. Signed-off-by: Alok N Kataria Acked-by: Chris Wright LKML-Reference: <1254193238.13456.48.camel@ank32.eng.vmware.com> [ removed a bogus Kconfig change, marked (DEPRECATED) in Kconfig ] Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- Documentation/feature-removal-schedule.txt | 30 ++++++++++++++++++++++++++++++ arch/x86/Kconfig | 11 ++++++++++- arch/x86/kernel/vmi_32.c | 2 +- 3 files changed, 41 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 89a47b5aff07..04e6c819b28a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -451,3 +451,33 @@ Why: OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR will also allow making ALSA OSS emulation independent of sound_core. The dependency will be broken then too. Who: Tejun Heo + +---------------------------- + +What: Support for VMware's guest paravirtuliazation technique [VMI] will be + dropped. +When: 2.6.37 or earlier. +Why: With the recent innovations in CPU hardware acceleration technologies + from Intel and AMD, VMware ran a few experiments to compare these + techniques to guest paravirtualization technique on VMware's platform. + These hardware assisted virtualization techniques have outperformed the + performance benefits provided by VMI in most of the workloads. VMware + expects that these hardware features will be ubiquitous in a couple of + years, as a result, VMware has started a phased retirement of this + feature from the hypervisor. We will be removing this feature from the + Kernel too. Right now we are targeting 2.6.37 but can retire earlier if + technical reasons (read opportunity to remove major chunk of pvops) + arise. + + Please note that VMI has always been an optimization and non-VMI kernels + still work fine on VMware's platform. + Latest versions of VMware's product which support VMI are, + Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence + releases for these products will continue supporting VMI. + + For more details about VMI retirement take a look at this, + http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html + +Who: Alok N Kataria + +---------------------------- diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c876bace8fdc..07e01149e3bf 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -491,7 +491,7 @@ if PARAVIRT_GUEST source "arch/x86/xen/Kconfig" config VMI - bool "VMI Guest support" + bool "VMI Guest support (DEPRECATED)" select PARAVIRT depends on X86_32 ---help--- @@ -500,6 +500,15 @@ config VMI at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. + As of September 2009, VMware has started a phased retirement + of this feature from VMware's products. Please see + feature-removal-schedule.txt for details. If you are + planning to enable this option, please note that you cannot + live migrate a VMI enabled VM to a future VMware product, + which doesn't support VMI. So if you expect your kernel to + seamlessly migrate to newer VMware products, keep this + disabled. + config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 31e6f6cfe53e..d430e4c30193 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -648,7 +648,7 @@ static inline int __init activate_vmi(void) pv_info.paravirt_enabled = 1; pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; - pv_info.name = "vmi"; + pv_info.name = "vmi [deprecated]"; pv_init_ops.patch = vmi_patch; -- cgit v1.2.3 From 5a943617ef52e9f79cd7cf437aad8870be27aabb Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 8 Oct 2009 17:20:15 +0200 Subject: x86, cpuid: Simplify the code in cpuid_open Peter picked up my patch for tip/x86/cpu that removes the bkl in cpuid_open. Ingo subsequently merged that into tip/master. This patch folds back in tglx's 55968ede164ae523692f00717f50cd926f1382a0 to my patch that removed the bkl. This simplifies the code, and makes it consistent with the changes to kill the bkl in msr.c as well. Originally-by: Thomas Gleixner Signed-off-by: John Kacur Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index ef6928418c8f..48e8e6558b26 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -116,18 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; - int ret = 0; cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (c->cpuid_level < 0) - ret = -EIO; /* CPUID not supported */ -out: - return ret; + return -EIO; /* CPUID not supported */ + + return 0; } /* -- cgit v1.2.3 From 04a705df47d1ea27ca2b066f24b1951c51792d0d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 6 Oct 2009 16:42:08 +0200 Subject: perf_events: Check for filters on fixed counter events Intel fixed counters do not support all the filters possible with a generic counter. Thus, if a fixed counter event is passed but with certain filters set, then the fixed_mode_idx() function must fail and the event must be measured in a generic counter instead. Reject filters are: inv, edge, cnt-mask. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <1254840129-6198-2-git-send-email-eranian@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 13 ++++++++++++- arch/x86/kernel/cpu/perf_event.c | 6 ++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3fd5065..8d9f8548a870 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -28,9 +28,20 @@ */ #define ARCH_PERFMON_EVENT_MASK 0xffff +/* + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..1d16bd69551e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1349,6 +1349,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) if (!x86_pmu.num_events_fixed) return -1; + /* + * fixed counters do not take all possible filters + */ + if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) + return -1; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) -- cgit v1.2.3 From b690081d4d3f6a23541493f1682835c3cd5c54a1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 6 Oct 2009 16:42:09 +0200 Subject: perf_events: Add event constraints support for Intel processors On some Intel processors, not all events can be measured in all counters. Some events can only be measured in one particular counter, for instance. Assigning an event to the wrong counter does not crash the machine but this yields bogus counts, i.e., silent error. This patch changes the event to counter assignment logic to take into account event constraints for Intel P6, Core and Nehalem processors. There is no contraints on Intel Atom. There are constraints on Intel Yonah (Core Duo) but they are not provided in this patch given that this processor is not yet supported by perf_events. As a result of the constraints, it is possible for some event groups to never actually be loaded onto the PMU if they contain two events which can only be measured on a single counter. That situation can be detected with the scaling information extracted with read(). Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <1254840129-6198-3-git-send-email-eranian@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 109 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d16bd69551e..9c758548a0e6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -77,6 +77,18 @@ struct cpu_hw_events { struct debug_store *ds; }; +struct event_constraint { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int code; +}; + +#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } +#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->idxmsk[0]; (e)++) + + /* * struct x86_pmu - generic x86 pmu */ @@ -102,6 +114,7 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); + int (*get_event_idx)(struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -110,6 +123,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +static const struct event_constraint *event_constraint; + /* * Not sure about some of these */ @@ -155,6 +170,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } +static const struct event_constraint intel_p6_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; /* * Intel PerfMon v3. Used on Core2 and later. @@ -170,6 +195,35 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; +static const struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static const struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -932,6 +986,8 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config = ARCH_PERFMON_EVENTSEL_INT; + hwc->idx = -1; + /* * Count user and OS events unless requested not to. */ @@ -1365,6 +1421,45 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) return -1; } +/* + * generic counter allocator: get next free counter + */ +static int gen_get_event_idx(struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); + return idx == x86_pmu.num_events ? -1 : idx; +} + +/* + * intel-specific counter allocator: check event constraints + */ +static int intel_get_event_idx(struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + const struct event_constraint *event_constraint; + int i, code; + + if (!event_constraint) + goto skip; + + code = hwc->config & 0xff; + + for_each_event_constraint(event_constraint, event_constraint) { + if (code == event_constraint->code) { + for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + return -1; + } + } +skip: + return gen_get_event_idx(hwc); +} + /* * Find a PMC slot for the freshly enabled / scheduled in event: */ @@ -1402,11 +1497,10 @@ static int x86_pmu_enable(struct perf_event *event) } else { idx = hwc->idx; /* Try to get the previous generic event again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_events); - if (idx == x86_pmu.num_events) + idx = x86_pmu.get_event_idx(hwc); + if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); @@ -1883,6 +1977,7 @@ static struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, + .get_event_idx = intel_get_event_idx, }; static struct x86_pmu intel_pmu = { @@ -1906,6 +2001,7 @@ static struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, + .get_event_idx = intel_get_event_idx, }; static struct x86_pmu amd_pmu = { @@ -1926,6 +2022,7 @@ static struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, + .get_event_idx = gen_get_event_idx, }; static int p6_pmu_init(void) @@ -1938,10 +2035,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ + event_constraint = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ + event_constraint = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2013,12 +2112,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); + event_constraint = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + event_constraint = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: -- cgit v1.2.3 From fe9081cc9bdabb0be953a39ad977cea14e35bce5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Oct 2009 11:56:07 +0200 Subject: perf, x86: Add simple group validation Refuse to add events when the group wouldn't fit onto the PMU anymore. Naive implementation. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <1254911461.26976.239.camel@twins> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 90 ++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9c758548a0e6..9961d845719d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -114,7 +114,8 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - int (*get_event_idx)(struct hw_perf_event *hwc); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -523,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -1390,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int -fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +static int fixed_mode_idx(struct hw_perf_event *hwc) { unsigned int hw_event; @@ -1424,9 +1424,9 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) /* * generic counter allocator: get next free counter */ -static int gen_get_event_idx(struct hw_perf_event *hwc) +static int +gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); @@ -1436,16 +1436,16 @@ static int gen_get_event_idx(struct hw_perf_event *hwc) /* * intel-specific counter allocator: check event constraints */ -static int intel_get_event_idx(struct hw_perf_event *hwc) +static int +intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); const struct event_constraint *event_constraint; int i, code; if (!event_constraint) goto skip; - code = hwc->config & 0xff; + code = hwc->config & CORE_EVNTSEL_EVENT_MASK; for_each_event_constraint(event_constraint, event_constraint) { if (code == event_constraint->code) { @@ -1457,26 +1457,22 @@ static int intel_get_event_idx(struct hw_perf_event *hwc) } } skip: - return gen_get_event_idx(hwc); + return gen_get_event_idx(cpuc, hwc); } -/* - * Find a PMC slot for the freshly enabled / scheduled in event: - */ -static int x86_pmu_enable(struct perf_event *event) +static int +x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(event, hwc); + idx = fixed_mode_idx(hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->event_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* @@ -1499,17 +1495,33 @@ static int x86_pmu_enable(struct perf_event *event) /* Try to get the previous generic event again */ if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = x86_pmu.get_event_idx(hwc); + idx = x86_pmu.get_event_idx(cpuc, hwc); if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; } + return idx; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = x86_schedule_event(cpuc, hwc); + if (idx < 0) + return idx; + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); @@ -2212,11 +2224,47 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +static int +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event fake_event = event->hw; + + if (event->pmu != &pmu) + return 0; + + return x86_schedule_event(cpuc, &fake_event); +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_pmu; + + memset(&fake_pmu, 0, sizeof(fake_pmu)); + + if (!validate_event(&fake_pmu, leader)) + return -ENOSPC; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -ENOSPC; + } + + if (!validate_event(&fake_pmu, event)) + return -ENOSPC; + + return 0; +} + const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; err = __hw_perf_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + } if (err) { if (event->destroy) event->destroy(event); -- cgit v1.2.3 From f3834b9ef68067199486740b31f691afb14dbdf5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2009 10:12:46 +0200 Subject: x86: Generate cmpxchg build failures Rework the x86 cmpxchg() implementation to generate build failures when used on improper types. Signed-off-by: Peter Zijlstra Acked-by: Linus Torvalds LKML-Reference: <1254771187.21044.22.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cmpxchg_32.h | 218 ++++++++++++++--------------------- arch/x86/include/asm/cmpxchg_64.h | 234 +++++++++++++++----------------------- 2 files changed, 177 insertions(+), 275 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 82ceb788a981..5371174cf5d0 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -8,14 +8,50 @@ * you need to test for the feature in boot_cpu_data. */ -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) +extern void __xchg_wrong_size(void); + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((struct __xchg_dummy *)(x)) +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + /* * The semantics of XCHGCMP8B are a bit strange, this is why * there is a loop and the loading of %%eax and %%edx has to @@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, (unsigned int)((value) >> 32)) \ : __set_64bit(ptr, ll_low((value)), ll_high((value)))) -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +extern void __cmpxchg_wrong_size(void); /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, (unsigned long long)(n))) #endif -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, unsigned long long new) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e0de8c..485ae415faec 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,9 +3,6 @@ #include /* Provides LOCK_PREFIX */ -#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ - (ptr), sizeof(*(ptr)))) - #define __xg(x) ((volatile long *)(x)) static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) @@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) #define _set_64bit set_64bit +extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, * but generally the primitive is invalid, *ptr is output argument. --ANK */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %k0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 8: - asm volatile("xchgq %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %k0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile("xchgq %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + +#define __HAVE_ARCH_CMPXCHG 1 /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %k1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile(lock "cmpxchgq %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) -#define __HAVE_ARCH_CMPXCHG 1 +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile("cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + #define cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ -- cgit v1.2.3 From e7ab0f7b50bc4688fb5cf65de5d42e3b882fb8d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 9 Oct 2009 15:58:20 +0200 Subject: Revert "x86, timers: Check for pending timers after (device) interrupts" This reverts commit 9bcbdd9c58617f1301dd4f17c738bb9bc73aca70. The real bug producing LatencyTop latencies has been fixed in: f5dc375: sched: Update the clock of runqueue select_task_rq() selected And the commit being reverted here triggers local timer processing from every device IRQ. If device IRQs come in at a high frequency, this could cause a performance regression. The commit being reverted here purely 'fixed' the reported latency as a side effect, because CPUs were being moved out of idle more often. Acked-by: Peter Zijlstra Cc: Arjan van de Ven Cc: Frans Pop Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner LKML-Reference: <20091008064041.67219b13@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 2 -- arch/x86/kernel/smp.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..74656d1d4e30 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -244,7 +244,6 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } - run_local_timers(); irq_exit(); set_irq_regs(old_regs); @@ -269,7 +268,6 @@ void smp_generic_interrupt(struct pt_regs *regs) if (generic_interrupt_extension) generic_interrupt_extension(); - run_local_timers(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d915d956e66d..ec1de97600e7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -198,7 +198,6 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); - run_local_timers(); /* * KVM uses this interrupt to force a cpu out of guest mode */ -- cgit v1.2.3 From c5cca146aa03e1f60fb179df65f0dbaf17bc64ed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 18:31:20 +0200 Subject: x86/amd-iommu: Workaround for erratum 63 There is an erratum for IOMMU hardware which documents undefined behavior when forwarding SMI requests from peripherals and the DTE of that peripheral has a sysmgt value of 01b. This problem caused weird IO_PAGE_FAULTS in my case. This patch implements the suggested workaround for that erratum into the AMD IOMMU driver. The erratum is documented with number 63. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + arch/x86/kernel/amd_iommu.c | 2 ++ arch/x86/kernel/amd_iommu_init.c | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f06..9dbd4030f0cd 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -30,6 +30,7 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_shutdown(void); +extern void amd_iommu_apply_erratum_63(u16 devid); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..f95dfe526444 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1114,6 +1114,8 @@ static void __detach_device(struct protection_domain *domain, u16 devid) amd_iommu_dev_table[devid].data[1] = 0; amd_iommu_dev_table[devid].data[2] = 0; + amd_iommu_apply_erratum_63(devid); + /* decrease reference counter */ domain->dev_cnt -= 1; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..498c8c79aaa5 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -509,6 +509,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit) amd_iommu_dev_table[devid].data[i] |= (1 << _bit); } +static int get_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; +} + + +void amd_iommu_apply_erratum_63(u16 devid) +{ + int sysmgt; + + sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(devid, DEV_ENTRY_IW); +} + /* Writes the specific IOMMU for a device into the rlookup table */ static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) { @@ -537,6 +557,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, if (flags & ACPI_DEVFLAG_LINT1) set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + amd_iommu_apply_erratum_63(devid); + set_iommu_for_device(iommu, devid); } -- cgit v1.2.3 From a6f05a6a0a1713d5b019f096799d49226807d3df Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Oct 2009 18:02:54 -0700 Subject: x86-64: make compat_start_thread() match start_thread() For no real good reason, compat_start_thread() was embedded inline in whereas the native start_thread() lives in process_*.c. Move compat_start_thread() to process_64.c, remove gratuitious differences, and fix a few items which mostly look like bit rot. In particular, compat_start_thread() didn't do free_thread_xstate(), which means it was hanging on to the xstate store area even when it was not needed. It was also not setting old_rsp, but it looks like that generally shouldn't matter for a 32-bit process. Note: compat_start_thread *has* to be a macro, since it is tested with start_thread_ia32() as the out of line function name. Signed-off-by: H. Peter Anvin Acked-by: Suresh Siddha --- arch/x86/include/asm/elf.h | 20 ++------------------ arch/x86/kernel/process_64.c | 23 ++++++++++++++++++++++- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 456a304b8172..8a024babe5e6 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -157,19 +157,6 @@ do { \ #define compat_elf_check_arch(x) elf_check_arch_ia32(x) -static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) -{ - loadsegment(fs, 0); - loadsegment(ds, __USER32_DS); - loadsegment(es, __USER32_DS); - load_gs_index(0); - regs->ip = ip; - regs->sp = sp; - regs->flags = X86_EFLAGS_IF; - regs->cs = __USER32_CS; - regs->ss = __USER32_DS; -} - static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) { @@ -191,11 +178,8 @@ do { \ #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -#define compat_start_thread(regs, ip, sp) \ -do { \ - start_ia32_thread(regs, ip, sp); \ - set_fs(USER_DS); \ -} while (0) +void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); +#define compat_start_thread start_thread_ia32 #define COMPAT_SET_PERSONALITY(ex) \ do { \ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b683170..7cf0a6b6d4bb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -356,7 +356,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; - regs->flags = 0x200; + regs->flags = X86_EFLAGS_IF; set_fs(USER_DS); /* * Free the old FP and other extended state @@ -365,6 +365,27 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +#ifdef CONFIG_IA32_EMULATION +void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) +{ + loadsegment(fs, 0); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + load_gs_index(0); + regs->ip = new_ip; + regs->sp = new_sp; + percpu_write(old_rsp, new_sp); + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + regs->flags = X86_EFLAGS_IF; + set_fs(USER_DS); + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); +} +#endif + /* * switch_to(x,y) should switch tasks from x to y. * -- cgit v1.2.3 From e634d8fc792c66c3d4ff45518c04848c1e28f221 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 9 Oct 2009 15:56:53 -0700 Subject: x86-64: merge the standard and compat start_thread() functions The only thing left that differs between the standard and compat start_thread functions is the actual segment numbers and the prototype, so have a single common function which contains the guts and two very small wrappers. Signed-off-by: H. Peter Anvin Acked-by: Suresh Siddha --- arch/x86/kernel/process_64.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7cf0a6b6d4bb..eb261c582a44 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -344,18 +344,20 @@ out: return err; } -void -start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) +static void +start_thread_common(struct pt_regs *regs, unsigned long new_ip, + unsigned long new_sp, + unsigned int _cs, unsigned int _ss, unsigned int _ds) { loadsegment(fs, 0); - loadsegment(es, 0); - loadsegment(ds, 0); + loadsegment(es, _ds); + loadsegment(ds, _ds); load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; percpu_write(old_rsp, new_sp); - regs->cs = __USER_CS; - regs->ss = __USER_DS; + regs->cs = _cs; + regs->ss = _ss; regs->flags = X86_EFLAGS_IF; set_fs(USER_DS); /* @@ -363,26 +365,19 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) */ free_thread_xstate(current); } -EXPORT_SYMBOL_GPL(start_thread); + +void +start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) +{ + start_thread_common(regs, new_ip, new_sp, + __USER_CS, __USER_DS, 0); +} #ifdef CONFIG_IA32_EMULATION void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) { - loadsegment(fs, 0); - loadsegment(ds, __USER32_DS); - loadsegment(es, __USER32_DS); - load_gs_index(0); - regs->ip = new_ip; - regs->sp = new_sp; - percpu_write(old_rsp, new_sp); - regs->cs = __USER32_CS; - regs->ss = __USER32_DS; - regs->flags = X86_EFLAGS_IF; - set_fs(USER_DS); - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); + start_thread_common(regs, new_ip, new_sp, + __USER32_CS, __USER32_DS, __USER32_DS); } #endif -- cgit v1.2.3 From 3bb258bf430d29a24350fe4f44f8bf07b7b7a8f6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 4 Oct 2009 17:53:29 -0700 Subject: ftrace.c: Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Remove prefixes from pr_, use pr_fmt(fmt). No change in output. Signed-off-by: Joe Perches Acked-by: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <9b377eefae9e28c599dd4a17bdc81172965e9931.1254701151.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e1652..25e6f5fc4b1e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -9,6 +9,8 @@ * the dangers of modifying code on the run. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: - pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); break; case 1: - pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + pr_info("converting mcount calls to 66 66 66 66 90\n"); memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); break; case 2: - pr_info("ftrace: converting mcount calls to jmp . + 5\n"); + pr_info("converting mcount calls to jmp . + 5\n"); memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); break; } -- cgit v1.2.3 From 3c355863fb32070a2800f41106519c5c3038623a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 4 Oct 2009 17:53:40 -0700 Subject: testmmiotrace.c: Add and use pr_fmt(fmt) - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt. - Strip MODULE_NAME from pr_s. - Remove MODULE_NAME definition. Signed-off-by: Joe Perches LKML-Reference: <3bb66cc7f85f77b9416902e1be7076f7e3f4ad48.1254701151.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/testmmiotrace.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df5..8565d944f7cf 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,12 +1,13 @@ /* * Written by Pekka Paalanen, 2008-2009 */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include -#define MODULE_NAME "testmmiotrace" - static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " @@ -30,7 +31,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; - pr_info(MODULE_NAME ": write test.\n"); + pr_info("write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; - pr_info(MODULE_NAME ": read test.\n"); + pr_info("read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) static void do_read_far_test(void __iomem *p) { - pr_info(MODULE_NAME ": read far test.\n"); + pr_info("read far test.\n"); mmiotrace_printk("Read far test.\n"); ioread32(p + read_far); @@ -78,7 +79,7 @@ static void do_test(unsigned long size) { void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { - pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); + pr_err("could not ioremap, aborting.\n"); return; } mmiotrace_printk("ioremap returned %p.\n", p); @@ -94,24 +95,22 @@ static int __init init(void) unsigned long size = (read_far) ? (8 << 20) : (16 << 10); if (mmio_address == 0) { - pr_err(MODULE_NAME ": you have to use the module argument " - "mmio_address.\n"); - pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" - " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + pr_err("you have to use the module argument mmio_address.\n"); + pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " - "address space, and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); + pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " + "and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); do_test(size); - pr_info(MODULE_NAME ": All done.\n"); + pr_info("All done.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unloaded.\n"); + pr_debug("unloaded.\n"); } module_init(init); -- cgit v1.2.3 From fb2531953fd8855abdcf458459020fd382c5deca Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 7 Oct 2009 13:20:38 +0200 Subject: mce, edac: Use an atomic notifier for MCEs decoding Add an atomic notifier which ensures proper locking when conveying MCE info to EDAC for decoding. The actual notifier call overrides a default, negative priority notifier. Note: make sure we register the default decoder only once since mcheck_init() runs on each CPU. Signed-off-by: Borislav Petkov LKML-Reference: <20091003065752.GA8935@liondog.tnic> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 3 ++- arch/x86/kernel/cpu/mcheck/mce.c | 29 ++++++++++++++++++++--------- drivers/edac/edac_mce_amd.c | 21 ++++++++++++--------- 3 files changed, 34 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364f..227a72df6441 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,6 +108,8 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) +extern struct atomic_notifier_head x86_mce_decoder_chain; + #ifdef __KERNEL__ #include @@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); - #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d0..15ba9c972d7a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -static void default_decode_mce(struct mce *m) +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + +static int default_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { pr_emerg("No human readable MCE decoding support on this CPU type.\n"); pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + + return NOTIFY_STOP; } -/* - * CPU/chipset specific EDAC code can register a callback here to print - * MCE errors in a human-readable form: - */ -void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; -EXPORT_SYMBOL(x86_mce_decode_callback); +static struct notifier_block mce_dec_nb = { + .notifier_call = default_decode_mce, + .priority = -1, +}; /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -204,9 +212,9 @@ static void print_mce(struct mce *m) /* * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that): + * (if the CPU has an implementation for that) */ - x86_mce_decode_callback(m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } static void print_mce_head(void) @@ -1420,6 +1428,9 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) mce_cpu_features(c); mce_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); + + if (raw_smp_processor_id() == 0) + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); } /* diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 713ed7d37247..689cc6a6214d 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -3,7 +3,6 @@ static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); -static void (*orig_mce_callback)(struct mce *m); void amd_report_gart_errors(bool v) { @@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec) pr_warning("Huh? Unknown MCE error 0x%x\n", ec); } -static void amd_decode_mce(struct mce *m) +static int amd_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { + struct mce *m = (struct mce *)data; struct err_regs regs; int node, ecc; @@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m) } amd_decode_err_code(m->status & 0xffff); + + return NOTIFY_STOP; } +static struct notifier_block amd_mce_dec_nb = { + .notifier_call = amd_decode_mce, +}; + static int __init mce_amd_init(void) { /* * We can decode MCEs for Opteron and later CPUs: */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 >= 0xf)) { - /* safe the default decode mce callback */ - orig_mce_callback = x86_mce_decode_callback; - - x86_mce_decode_callback = amd_decode_mce; - } + (boot_cpu_data.x86 >= 0xf)) + atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); return 0; } @@ -442,7 +445,7 @@ early_initcall(mce_amd_init); #ifdef MODULE static void __exit mce_amd_exit(void) { - x86_mce_decode_callback = orig_mce_callback; + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); } MODULE_DESCRIPTION("AMD MCE decoder"); -- cgit v1.2.3 From 494f6a9e12f5137d355d3ce3f5789ef148b642bc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 7 Oct 2009 19:04:29 -0400 Subject: this_cpu: Use this_cpu_xx in nmi handling this_cpu_inc/dec reduces the number of instructions needed. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/sparc/kernel/nmi.c | 8 ++++---- arch/x86/kernel/apic/nmi.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index b129611590a4..f30f4a1ead23 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -47,7 +47,7 @@ static DEFINE_PER_CPU(short, wd_enabled); static int endflag __initdata; static DEFINE_PER_CPU(unsigned int, last_irq_sum); -static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(long, alert_counter); static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog(void) @@ -112,13 +112,13 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) touched = 1; } if (!touched && __get_cpu_var(last_irq_sum) == sum) { - local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 30 * nmi_hz) + __this_cpu_inc(per_cpu_var(alert_counter)); + if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - local_set(&__get_cpu_var(alert_counter), 0); + __this_cpu_write(per_cpu_var(alert_counter), 0); } if (__get_cpu_var(wd_enabled)) { write_pic(picl_value(nmi_hz)); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188a..e631cc4416f7 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -360,7 +360,7 @@ void stop_apic_nmi_watchdog(void *unused) */ static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(long, alert_counter); static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog(void) @@ -437,8 +437,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + __this_cpu_inc(per_cpu_var(alert_counter)); + if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ @@ -446,7 +446,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - local_set(&__get_cpu_var(alert_counter), 0); + __this_cpu_write(per_cpu_var(alert_counter), 0); } /* see if the nmi watchdog went off */ -- cgit v1.2.3 From c03cb3149daed3e411657e3212d05ae27cf1a874 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 11 Oct 2009 10:33:02 -0700 Subject: x86: Relegate CONFIG_PAT and CONFIG_MTRR configurability to EMBEDDED MTRR and PAT support (which got added to CPUs over 10 years ago) are no longer really optional in that more and more things are depending on PAT just working, including various drivers and newer versions of X. (to not even speak of MTRR) Having this as a regular config option just no longer makes sense. This patch relegates CONFIG_X86_PAT to the EMBEDDED category so ultra-embedded can still disable it if they really need to. Also-Suggested-by: Roland Dreier Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: Henrique de Moraes Holschuh LKML-Reference: <20091011103302.62bded41@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c876bace8fdc..a67363bbe825 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1321,7 +1321,9 @@ config MATH_EMULATION kernel, it won't hurt. config MTRR - bool "MTRR (Memory Type Range Register) support" + bool + default y + prompt "MTRR (Memory Type Range Register) support" if EMBEDDED ---help--- On Intel P6 family processors (Pentium Pro, Pentium II and later) the Memory Type Range Registers (MTRRs) may be used to control @@ -1387,7 +1389,8 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT config X86_PAT bool - prompt "x86 PAT support" + default y + prompt "x86 PAT support" if EMBEDDED depends on MTRR ---help--- Use PAT attributes to setup page level cache control. -- cgit v1.2.3 From 9a821b231644028f8e2a853eb33d1184e925b183 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Oct 2009 12:59:29 +0100 Subject: x86: Move pci_iommu_init to rootfs_initcall() We want this to happen after the PCI quirks, which are now running at the very end of the fs_initcalls. This works around the BIOS problems which were originally addressed by commit db8be50c4307dac2b37305fc59c8dc0f978d09ea ('USB: Work around BIOS bugs by quiescing USB controllers earlier'), which was reverted in commit d93a8f829fe1d2f3002f2c6ddb553d12db420412. Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..e0d9199d0eb9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -311,7 +311,7 @@ void pci_iommu_shutdown(void) amd_iommu_shutdown(); } /* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); +rootfs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ -- cgit v1.2.3 From 7a4b7e5e741fe0a72a517b0367a2659aa53f7c44 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 6 Oct 2009 16:32:43 +0100 Subject: x86: Fix Suspend to RAM freeze on Acer Aspire 1511Lmi laptop Move the trampoline and accessors back out of .cpuinit.* for the case of 64-bits+ACPI_SLEEP. This solves s2ram hangs reported in: http://bugzilla.kernel.org/show_bug.cgi?id=14279 Reported-and-bisected-by: Christian Casteyde Signed-off-by: Jan Beulich Cc: Cc: "Andrew Morton" Cc: "Rafael J. Wysocki" Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline.c | 12 ++++++++++-- arch/x86/kernel/trampoline_64.S | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 699f7eeb896a..cd022121cab6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -3,8 +3,16 @@ #include #include +#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) +#define __trampinit +#define __trampinitdata +#else +#define __trampinit __cpuinit +#define __trampinitdata __cpuinitdata +#endif + /* ready for x86_64 and x86 */ -unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE); +unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); void __init reserve_trampoline_memory(void) { @@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void) * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ -unsigned long __cpuinit setup_trampoline(void) +unsigned long __trampinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); return virt_to_phys(trampoline_base); diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 596d54c660a5..3af2dff58b21 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -32,8 +32,12 @@ #include #include +#ifdef CONFIG_ACPI_SLEEP +.section .rodata, "a", @progbits +#else /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ __CPUINITRODATA +#endif .code16 ENTRY(trampoline_data) -- cgit v1.2.3 From ae24ffe5ecec17c956ac25371d7c2e12b4b36e53 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 12 Oct 2009 10:18:23 -0400 Subject: x86, 64-bit: Move K8 B step iret fixup to fault entry asm Move the handling of truncated %rip from an iret fault to the fault entry path. This allows x86-64 to use the standard search_extable() function. Signed-off-by: Brian Gerst Cc: Linus Torvalds Cc: Jan Beulich LKML-Reference: <1255357103-5418-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 1 - arch/x86/kernel/entry_64.S | 11 ++++++++--- arch/x86/mm/extable.c | 31 ------------------------------- 3 files changed, 8 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c930b491..abd3e0ea762a 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -570,7 +570,6 @@ extern struct movsl_mask { #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else -# define ARCH_HAS_SEARCH_EXTABLE # include "uaccess_64.h" #endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f358..af0f4b226dbe 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1491,12 +1491,17 @@ error_kernelspace: leaq irq_return(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP+8(%rsp) - je error_swapgs + movl %ecx,%eax /* zero extend */ + cmpq %rax,RIP+8(%rsp) + je bstep_iret cmpq $gs_change,RIP+8(%rsp) je error_swapgs jmp error_sti + +bstep_iret: + /* Fix truncated RIP */ + movq %rcx,RIP+8(%rsp) + je error_swapgs END(error_entry) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca3b5a2..d0474ad2a6e5 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_X86_64 -/* - * Need to defined our own search_extable on X86_64 to work around - * a B stepping K8 bug. - */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - /* B stepping K8 bug */ - if ((value >> 32) == 0) - value |= 0xffffffffUL << 32; - - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return NULL; -} -#endif -- cgit v1.2.3 From d1705c558c95418378b11a0be963fe1b3e2fa381 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 11:32:31 -0700 Subject: x86: fix kernel panic on 32 bits when profiling Latest kernel has a kernel panic in booting on i386 machine when profile=2 setting in cmdline. It is due to 'sp' being incorrect in profile_pc(). BUG: unable to handle kernel NULL pointer dereference at 00000246 IP: [] profile_pc+0x2a/0x48 *pde = 00000000 Oops: 0000 [#1] SMP This differs from the original version by Alex Shi in that we use the kernel_stack_pointer() inline already defined in for this purpose, instead of #ifdef. Originally-by: Alex Shi Cc: "Chen, Tim C" Cc: "Rafael J. Wysocki" Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/kernel/time.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dcb00d278512..be2573448ed9 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = (unsigned long *)regs->sp; + unsigned long *sp = + (unsigned long *)kernel_stack_pointer(regs); /* * Return address is either directly at stack pointer * or above a saved flags. Eflags has bits 22-31 zero, -- cgit v1.2.3 From ad8f4356af58f7ded6b4a5787c67c7cab51066b5 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 6 Oct 2009 07:04:52 -0700 Subject: x86: Don't use the strict copy checks when branch profiling is in use The branch profiling creates very complex code for each if statement, to the point that gcc has trouble even analyzing something as simple as if (count > 5) count = 5; This then means that causing an error on code that gcc cannot analyze for copy_from_user() and co is not very productive. This patch excludes the strict copy checks in the case of branch profiling being enabled. Signed-off-by: Arjan van de Ven Cc: Steven Rostedt LKML-Reference: <20091006070452.5e1fc119@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1bd2e36f1538..fb772b6a41ad 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -289,7 +289,7 @@ config OPTIMIZE_INLINING config DEBUG_STRICT_USER_COPY_CHECKS bool "Strict copy size checks" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING ---help--- Enabling this option turns a certain set of sanity checks for user copy operations into compile time failures. -- cgit v1.2.3 From def3c5d0a34e4b09b3cea4435c17209ad347104d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:09:07 -0700 Subject: x86: use kernel_stack_pointer() in process_32.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..35e6fad73e0d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -134,7 +134,7 @@ void __show_regs(struct pt_regs *regs, int all) ss = regs->ss & 0xffff; gs = get_user_gs(regs); } else { - sp = (unsigned long) (®s->sp); + sp = kernel_stack_pointer(regs); savesegment(ss, ss); savesegment(gs, gs); } -- cgit v1.2.3 From a343c75d338aa2afaea4a2a8e40de9e67b6fb4a7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:11:09 -0700 Subject: x86: use kernel_stack_pointer() in dumpstack.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Furthermore, user_mode() is only valid when the process is known to not run in V86 mode. Use the safer user_mode_vm() instead. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/dumpstack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d4339..b8ce165dde5d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) show_registers(regs); #ifdef CONFIG_X86_32 - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; + } else { + sp = kernel_stack_pointer(regs); + savesegment(ss, ss); } printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); print_symbol("%s", regs->ip); -- cgit v1.2.3 From 5ca6c0ca5dbf105d7b0ffdae2289519982189730 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:12:18 -0700 Subject: x86: use kernel_stack_pointer() in kgdb.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Signed-off-by: H. Peter Anvin Cc: Jason Wessel --- arch/x86/kernel/kgdb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..3310d849abd2 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -88,7 +88,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; - gdb_regs[GDB_SP] = (int)®s->sp; #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +100,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = regs->sp; #endif + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); } /** -- cgit v1.2.3 From 98272ed0d2e6509fe7dc571e77956c99bf653bb6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:14:10 -0700 Subject: x86: use kernel_stack_pointer() in kprobes.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Signed-off-by: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu --- arch/x86/kernel/kprobes.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..2ee4fa3a3f01 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -60,19 +60,7 @@ void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef CONFIG_X86_64 -#define stack_addr(regs) ((unsigned long *)regs->sp) -#else -/* - * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs - * don't save the ss and esp registers if the CPU is already in kernel - * mode when it traps. So for kprobes, regs->sp and regs->ss are not - * the [nonexistent] saved stack pointer and ss register, but rather - * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to - * point to the top of the pre-int3 stack. - */ -#define stack_addr(regs) ((unsigned long *)®s->sp) -#endif +#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ -- cgit v1.2.3 From 7a693d3f0d10f978ebdf3082c41404ab97106567 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Oct 2009 08:16:30 +0200 Subject: perf_events, x86: Fix event constraints code There was namespace overlap due to a rename i did - this caused the following build warning, reported by Stephen Rothwell against linux-next x86_64 allmodconfig: arch/x86/kernel/cpu/perf_event.c: In function 'intel_get_event_idx': arch/x86/kernel/cpu/perf_event.c:1445: warning: 'event_constraint' is used uninitialized in this function This is a real bug not just a warning: fix it by renaming the global event-constraints table pointer to 'event_constraints'. Reported-by: Stephen Rothwell Cc: Stephane Eranian Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091013144223.369d616d.sfr@canb.auug.org.au> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9961d845719d..2e20bca3cca1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -124,7 +124,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static const struct event_constraint *event_constraint; +static const struct event_constraint *event_constraints; /* * Not sure about some of these @@ -1442,12 +1442,12 @@ intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) const struct event_constraint *event_constraint; int i, code; - if (!event_constraint) + if (!event_constraints) goto skip; code = hwc->config & CORE_EVNTSEL_EVENT_MASK; - for_each_event_constraint(event_constraint, event_constraint) { + for_each_event_constraint(event_constraint, event_constraints) { if (code == event_constraint->code) { for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { if (!test_and_set_bit(i, cpuc->used_mask)) @@ -2047,12 +2047,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ - event_constraint = intel_p6_event_constraints; + event_constraints = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ - event_constraint = intel_p6_event_constraints; + event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2124,14 +2124,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); - event_constraint = intel_core_event_constraints; + event_constraints = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - event_constraint = intel_nehalem_event_constraints; + event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: -- cgit v1.2.3 From a2e2725541fad72416326798c2d7fa4dafb7d337 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 Oct 2009 23:40:10 -0700 Subject: net: Introduce recvmmsg socket syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Meaning receive multiple messages, reducing the number of syscalls and net stack entry/exit operations. Next patches will introduce mechanisms where protocols that want to optimize this operation will provide an unlocked_recvmsg operation. This takes into account comments made by: . Paul Moore: sock_recvmsg is called only for the first datagram, sock_recvmsg_nosec is used for the rest. . Caitlin Bestler: recvmmsg now has a struct timespec timeout, that works in the same fashion as the ppoll one. If the underlying protocol returns a datagram with MSG_OOB set, this will make recvmmsg return right away with as many datagrams (+ the OOB one) it has received so far. . Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen datagrams and then recvmsg returns an error, recvmmsg will return the successfully received datagrams, store the error and return it in the next call. This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg, where we will be able to acquire the lock only at batch start and end, not at every underlying recvmsg call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- arch/alpha/kernel/systbls.S | 1 + arch/arm/kernel/calls.S | 1 + arch/avr32/kernel/syscall_table.S | 1 + arch/blackfin/mach-common/entry.S | 1 + arch/ia64/kernel/entry.S | 1 + arch/microblaze/kernel/syscall_table.S | 1 + arch/mips/kernel/scall32-o32.S | 1 + arch/mips/kernel/scall64-64.S | 1 + arch/mips/kernel/scall64-n32.S | 1 + arch/mips/kernel/scall64-o32.S | 1 + arch/sh/kernel/syscalls_64.S | 1 + arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 +- arch/x86/ia32/ia32entry.S | 1 + arch/x86/include/asm/unistd_32.h | 3 +- arch/x86/include/asm/unistd_64.h | 2 + arch/x86/kernel/syscall_table_32.S | 1 + arch/xtensa/include/asm/unistd.h | 4 +- include/linux/net.h | 1 + include/linux/socket.h | 10 ++ include/linux/syscalls.h | 4 + include/net/compat.h | 8 ++ kernel/sys_ni.c | 2 + net/compat.c | 33 ++++- net/socket.c | 225 +++++++++++++++++++++++++++------ 25 files changed, 261 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 95c9aef1c106..cda6b8b3d573 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -497,6 +497,7 @@ sys_call_table: .quad sys_signalfd .quad sys_ni_syscall .quad sys_eventfd + .quad sys_recvmmsg .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index fafce1b5c69f..f58c1156e779 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -374,6 +374,7 @@ CALL(sys_pwritev) CALL(sys_rt_tgsigqueueinfo) CALL(sys_perf_event_open) +/* 365 */ CALL(sys_recvmmsg) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index 7ee0057613b3..e76bad16b0f0 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -295,4 +295,5 @@ sys_call_table: .long sys_signalfd .long sys_ni_syscall /* 280, was sys_timerfd */ .long sys_eventfd + .long sys_recvmmsg .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 1e7cac23e25f..48692724b74c 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table) .long _sys_pwritev .long _sys_rt_tgsigqueueinfo .long _sys_perf_event_open + .long _sys_recvmmsg /* 370 */ .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index d0e7d37017b4..d75b872ca4dc 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1806,6 +1806,7 @@ sys_call_table: data8 sys_preadv data8 sys_pwritev // 1320 data8 sys_rt_tgsigqueueinfo + data8 sys_recvmmsg .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index ecec19155135..c1ab1dc10898 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -371,3 +371,4 @@ ENTRY(sys_call_table) .long sys_ni_syscall .long sys_rt_tgsigqueueinfo /* 365 */ .long sys_perf_event_open + .long sys_recvmmsg diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index fd2a9bb620d6..17202bbe843f 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -583,6 +583,7 @@ einval: li v0, -ENOSYS sys sys_rt_tgsigqueueinfo 4 sys sys_perf_event_open 5 sys sys_accept4 4 + sys sys_recvmmsg 5 .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 18bf7f32c5e4..a8a6c596eb04 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -420,4 +420,5 @@ sys_call_table: PTR sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 + PTR sys_recvmmsg .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 6ebc07976694..5154e64f7cfe 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -418,4 +418,5 @@ EXPORT(sysn32_call_table) PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ PTR sys_perf_event_open PTR sys_accept4 + PTR compat_sys_recvmmsg .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 9bbf9775e0bd..d0eff53d7cb9 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -538,4 +538,5 @@ sys_call_table: PTR compat_sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 + PTR compat_sys_recvmmsg .size sys_call_table,.-sys_call_table diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index 5bfde6c77498..07d2aaea9ae8 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -391,3 +391,4 @@ sys_call_table: .long sys_pwritev .long sys_rt_tgsigqueueinfo .long sys_perf_event_open + .long sys_recvmmsg /* 365 */ diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 0f1658d37490..ceb1530f8aa6 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,5 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 009825f6e73c..f37bef747e60 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,7 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg #endif /* CONFIG_COMPAT */ @@ -158,4 +158,4 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fda..11a6c79d5f46 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -832,4 +832,5 @@ ia32_sys_call_table: .quad compat_sys_pwritev .quad compat_sys_rt_tgsigqueueinfo /* 335 */ .quad sys_perf_event_open + .quad compat_sys_recvmmsg ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6fb3c209a7e3..3baf379fa840 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -342,10 +342,11 @@ #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 #define __NR_perf_event_open 336 +#define __NR_recvmmsg 337 #ifdef __KERNEL__ -#define NR_syscalls 337 +#define NR_syscalls 338 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 8d3ad0adbc68..4843f7ba754a 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev) __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #define __NR_perf_event_open 298 __SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_recvmmsg 299 +__SYSCALL(__NR_recvmmsg, sys_recvmmsg) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 0157cd26d7cc..70c2125d55b9 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -336,3 +336,4 @@ ENTRY(sys_call_table) .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ .long sys_perf_event_open + .long sys_recvmmsg diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index c092c8fbb2cf..4e55dc763021 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3) __SYSCALL(305, sys_ni_syscall, 0) #define __NR_eventfd 306 __SYSCALL(306, sys_eventfd, 1) +#define __NR_recvmmsg 307 +__SYSCALL(307, sys_recvmmsg, 5) -#define __NR_syscall_count 307 +#define __NR_syscall_count 308 /* * sysxtensa syscall handler diff --git a/include/linux/net.h b/include/linux/net.h index 529a0931711d..b42bb60fe92f 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -41,6 +41,7 @@ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ #define SYS_ACCEPT4 18 /* sys_accept4(2) */ +#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */ typedef enum { SS_FREE = 0, /* not allocated */ diff --git a/include/linux/socket.h b/include/linux/socket.h index 3273a0c5043b..59966f12990c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -65,6 +65,12 @@ struct msghdr { unsigned msg_flags; }; +/* For recvmmsg/sendmmsg */ +struct mmsghdr { + struct msghdr msg_hdr; + unsigned msg_len; +}; + /* * POSIX 1003.1g - ancillary data object information * Ancillary data consits of a sequence of pairs of @@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +struct timespec; + +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags, struct timespec *timeout); #endif #endif /* not kernel and not glibc */ #endif /* _LINUX_SOCKET_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a990ace1a838..714f063a3e6d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -25,6 +25,7 @@ struct linux_dirent64; struct list_head; struct msgbuf; struct msghdr; +struct mmsghdr; struct msqid_ds; struct new_utsname; struct nfsctl_arg; @@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned); asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, struct sockaddr __user *, int __user *); asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); +asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags, + struct timespec __user *timeout); asmlinkage long sys_socket(int, int, int); asmlinkage long sys_socketpair(int, int, int, int __user *); asmlinkage long sys_socketcall(int call, unsigned long __user *args); diff --git a/include/net/compat.h b/include/net/compat.h index 7c3002832d05..9679f05e9896 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -18,6 +18,11 @@ struct compat_msghdr { compat_uint_t msg_flags; }; +struct compat_mmsghdr { + struct compat_msghdr msg_hdr; + compat_uint_t msg_len; +}; + struct compat_cmsghdr { compat_size_t cmsg_len; compat_int_t cmsg_level; @@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); +extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, + unsigned, unsigned, + struct timespec __user *); extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e06d0b8d1951..f050ba85d420 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -48,8 +48,10 @@ cond_syscall(sys_shutdown); cond_syscall(sys_sendmsg); cond_syscall(compat_sys_sendmsg); cond_syscall(sys_recvmsg); +cond_syscall(sys_recvmmsg); cond_syscall(compat_sys_recvmsg); cond_syscall(compat_sys_recvfrom); +cond_syscall(compat_sys_recvmmsg); cond_syscall(sys_socketcall); cond_syscall(sys_futex); cond_syscall(compat_sys_futex); diff --git a/net/compat.c b/net/compat.c index a407c3addbae..e13f5256fd20 100644 --- a/net/compat.c +++ b/net/compat.c @@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4)}; + AL(4),AL(5)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct timespec __user *timeout) +{ + int datagrams; + struct timespec ktspec; + struct compat_timespec __user *utspec = + (struct compat_timespec __user *)timeout; + + if (get_user(ktspec.tv_sec, &utspec->tv_sec) || + get_user(ktspec.tv_nsec, &utspec->tv_nsec)) + return -EFAULT; + + datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, + flags | MSG_CMSG_COMPAT, &ktspec); + if (datagrams > 0 && + (put_user(ktspec.tv_sec, &utspec->tv_sec) || + put_user(ktspec.tv_nsec, &utspec->tv_nsec))) + datagrams = -EFAULT; + + return datagrams; +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_ACCEPT4) + if (call < SYS_SOCKET || call > SYS_RECVMMSG) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; + case SYS_RECVMMSG: + ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4])); + break; case SYS_ACCEPT4: ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; diff --git a/net/socket.c b/net/socket.c index 807935693846..9dff31c9b799 100644 --- a/net/socket.c +++ b/net/socket.c @@ -683,10 +683,9 @@ void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) { - int err; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -695,13 +694,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, si->size = size; si->flags = flags; - err = security_socket_recvmsg(sock, msg, size, flags); - if (err) - return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); } +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + int err = security_socket_recvmsg(sock, msg, size, flags); + + return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); +} + int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -717,6 +720,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, return ret; } +static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct kiocb iocb; + struct sock_iocb siocb; + int ret; + + init_sync_kiocb(&iocb, NULL); + iocb.private = &siocb; + ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&iocb); + return ret; +} + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1983,22 +2001,15 @@ out: return err; } -/* - * BSD recvmsg interface - */ - -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, - unsigned int, flags) +static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned flags, int nosec) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; - struct socket *sock; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; - struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; - int fput_needed; /* kernel mode address */ struct sockaddr_storage addr; @@ -2008,27 +2019,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, int __user *uaddr_len; if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(&msg_sys, msg_compat)) + if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - err = -EMSGSIZE; - if (msg_sys.msg_iovlen > UIO_MAXIOV) - goto out_put; + if (msg_sys->msg_iovlen > UIO_MAXIOV) + goto out; /* Check whether to allocate the iovec area */ err = -ENOMEM; - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); - if (msg_sys.msg_iovlen > UIO_FASTIOV) { + iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); + if (msg_sys->msg_iovlen > UIO_FASTIOV) { iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); if (!iov) - goto out_put; + goto out; } /* @@ -2036,46 +2043,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys.msg_name; + uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, + err = verify_compat_iovec(msg_sys, iov, (struct sockaddr *)&addr, VERIFY_WRITE); } else - err = verify_iovec(&msg_sys, iov, + err = verify_iovec(msg_sys, iov, (struct sockaddr *)&addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; - cmsg_ptr = (unsigned long)msg_sys.msg_control; - msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + cmsg_ptr = (unsigned long)msg_sys->msg_control; + msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg_sys, total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, + total_len, flags); if (err < 0) goto out_freeiov; len = err; if (uaddr != NULL) { err = move_addr_to_user((struct sockaddr *)&addr, - msg_sys.msg_namelen, uaddr, + msg_sys->msg_namelen, uaddr, uaddr_len); if (err < 0) goto out_freeiov; } - err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg_compat->msg_controllen); else - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg->msg_controllen); if (err) goto out_freeiov; @@ -2084,21 +2092,150 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); -out_put: +out: + return err; +} + +/* + * BSD recvmsg interface + */ + +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) +{ + int fput_needed, err; + struct msghdr msg_sys; + struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + + if (!sock) + goto out; + + err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); + fput_light(sock->file, fput_needed); out: return err; } -#ifdef __ARCH_WANT_SYS_SOCKETCALL +/* + * Linux recvmmsg interface + */ + +int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags, struct timespec *timeout) +{ + int fput_needed, err, datagrams; + struct socket *sock; + struct mmsghdr __user *entry; + struct msghdr msg_sys; + struct timespec end_time; + + if (timeout && + poll_select_set_timeout(&end_time, timeout->tv_sec, + timeout->tv_nsec)) + return -EINVAL; + + datagrams = 0; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + err = sock_error(sock->sk); + if (err) + goto out_put; + + entry = mmsg; + + while (datagrams < vlen) { + /* + * No need to ask LSM for more than the first datagram. + */ + err = __sys_recvmsg(sock, (struct msghdr __user *)entry, + &msg_sys, flags, datagrams); + if (err < 0) + break; + err = put_user(err, &entry->msg_len); + if (err) + break; + ++entry; + ++datagrams; + + if (timeout) { + ktime_get_ts(timeout); + *timeout = timespec_sub(end_time, *timeout); + if (timeout->tv_sec < 0) { + timeout->tv_sec = timeout->tv_nsec = 0; + break; + } + + /* Timeout, return less than vlen datagrams */ + if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) + break; + } + + /* Out of band data, return right away */ + if (msg_sys.msg_flags & MSG_OOB) + break; + } + +out_put: + fput_light(sock->file, fput_needed); + if (err == 0) + return datagrams; + + if (datagrams != 0) { + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { + /* + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). + */ + sock->sk->sk_err = -err; + } + + return datagrams; + } + + return err; +} + +SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct timespec __user *, timeout) +{ + int datagrams; + struct timespec timeout_sys; + + if (!timeout) + return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); + + if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) + return -EFAULT; + + datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); + + if (datagrams > 0 && + copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) + datagrams = -EFAULT; + + return datagrams; +} + +#ifdef __ARCH_WANT_SYS_SOCKETCALL /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[19]={ +static const unsigned char nargs[20] = { AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4) + AL(4),AL(5) }; #undef AL @@ -2118,7 +2255,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) int err; unsigned int len; - if (call < 1 || call > SYS_ACCEPT4) + if (call < 1 || call > SYS_RECVMMSG) return -EINVAL; len = nargs[call]; @@ -2196,6 +2333,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_RECVMMSG: + err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], + (struct timespec __user *)a[4]); + break; case SYS_ACCEPT4: err = sys_accept4(a0, (struct sockaddr __user *)a1, (int __user *)a[2], a[3]); -- cgit v1.2.3 From 71999d9862e667f1fd14f8fbfa0cce6d855bad3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 12 Oct 2009 16:32:43 -0700 Subject: x86/paravirt: Use normal calling sequences for irq enable/disable Bastian Blank reported a boot crash with stackprotector enabled, and debugged it back to edx register corruption. For historical reasons irq enable/disable/save/restore had special calling sequences to make them more efficient. With the more recent introduction of higher-level and more general optimisations this is no longer necessary so we can just use the normal PVOP_ macros. This fixes some residual bugs in the old implementations which left edx liable to inadvertent clobbering. Also, fix some bugs in __PVOP_VCALLEESAVE which were revealed by actual use. Reported-by: Bastian Blank Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel Cc: Xen-devel LKML-Reference: <4AD3BC9B.7040501@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 28 ++++------------------------ arch/x86/include/asm/paravirt_types.h | 10 ++++++---- 2 files changed, 10 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 8aebcc41041d..efb38994859c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) static inline unsigned long __raw_local_save_flags(void) { - unsigned long f; - - asm volatile(paravirt_alt(PARAVIRT_CALL) - : "=a"(f) - : paravirt_type(pv_irq_ops.save_fl), - paravirt_clobber(CLBR_EAX) - : "memory", "cc"); - return f; + return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : "=a"(f) - : PV_FLAGS_ARG(f), - paravirt_type(pv_irq_ops.restore_fl), - paravirt_clobber(CLBR_EAX) - : "memory", "cc"); + PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : - : paravirt_type(pv_irq_ops.irq_disable), - paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + PVOP_VCALLEE0(pv_irq_ops.irq_disable); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PARAVIRT_CALL) - : - : paravirt_type(pv_irq_ops.irq_enable), - paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc"); + PVOP_VCALLEE0(pv_irq_ops.irq_enable); } static inline unsigned long __raw_local_irq_save(void) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index dd0f5b32489d..9357473c8da0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -494,10 +494,11 @@ int paravirt_disable_iospace(void); #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS #else /* CONFIG_X86_64 */ +/* [re]ax isn't an arg, but the return val */ #define PVOP_VCALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx -#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + __edx = __edx, __ecx = __ecx, __eax = __eax +#define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) @@ -509,6 +510,7 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +/* void functions are still allowed [re]ax for scratch */ #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS @@ -583,8 +585,8 @@ int paravirt_disable_iospace(void); VEXTRA_CLOBBERS, \ pre, post, ##__VA_ARGS__) -#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ +#define __PVOP_VCALLEESAVE(op, pre, post, ...) \ + ____PVOP_VCALL(op.func, CLBR_RET_REG, \ PVOP_VCALLEE_CLOBBERS, , \ pre, post, ##__VA_ARGS__) -- cgit v1.2.3 From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 13 Oct 2009 16:19:41 +0900 Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging This approach is the first baby step towards solving many of the structural problems the x86 MCE logging code is having today: - It has a private ring-buffer implementation that has a number of limitations and has been historically fragile and buggy. - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE specific. /dev/mcelog is not part of any larger logging framework and hence has remained on the fringes for many years. - The MCE logging code is still very unclean partly due to its ABI limitations. Fields are being reused for multiple purposes, and the whole message structure is limited and x86 specific to begin with. All in one, the x86 tree would like to move away from this private implementation of an event logging facility to a broader framework. By using perf events we gain the following advantages: - Multiple user-space agents can access MCE events. We can have an mcelog daemon running but also a system-wide tracer capturing important events in flight-recorder mode. - Sampling support: the kernel and the user-space call-chain of MCE events can be stored and analyzed as well. This way actual patterns of bad behavior can be matched to precisely what kind of activity happened in the kernel (and/or in the app) around that moment in time. - Coupling with other hardware and software events: the PMU can track a number of other anomalies - monitoring software might chose to monitor those plus the MCE events as well - in one coherent stream of events. - Discovery of MCE sources - tracepoints are enumerated and tools can act upon the existence (or non-existence) of various channels of MCE information. - Filtering support: we just subscribe to and act upon the events we are interested in. Then even on a per event source basis there's in-kernel filter expressions available that can restrict the amount of data that hits the event channel. - Arbitrary deep per cpu buffering of events - we can buffer 32 entries or we can buffer as much as we want, as long as we have the RAM. - An NMI-safe ring-buffer implementation - mappable to user-space. - Built-in support for timestamping of events, PID markers, CPU markers, etc. - A rich ABI accessible over system call interface. Per cpu, per task and per workload monitoring of MCE events can be done this way. The ABI itself has a nice, meaningful structure. - Extensible ABI: new fields can be added without breaking tooling. New tracepoints can be added as the hardware side evolves. There's various parsers that can be used. - Lots of scheduling/buffering/batching modes of operandi for MCE events. poll() support. mmap() support. read() support. You name it. - Rich tooling support: even without any MCE specific extensions added the 'perf' tool today offers various views of MCE data: perf report, perf stat, perf trace can all be used to view logged MCE events and perhaps correlate them to certain user-space usage patterns. But it can be used directly as well, for user-space agents and policy action in mcelog, etc. With this we hope to achieve significant code cleanup and feature improvements in the MCE code, and we hope to be able to drop the /dev/mcelog facility in the end. This patch is just a plain dumb dump of mce_log() records to the tracepoints / perf events framework - a first proof of concept step. Signed-off-by: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++ include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 include/trace/events/mce.h (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d0..39caea3d8bc3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -141,6 +144,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 000000000000..7eee77895cb3 --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include +#include +#include + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 194ec34184869f0de1cf255c924fc5299e1b3d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 13 Oct 2009 16:33:50 -0400 Subject: function-graph/x86: Replace unbalanced ret with jmp The function graph tracer replaces the return address with a hook to trace the exit of the function call. This hook will finish by returning to the real location the function should return to. But the current implementation uses a ret to jump to the real return location. This causes a imbalance between calls and ret. That is the original function does a call, the ret goes to the handler and then the handler does a ret without a matching call. Although the function graph tracer itself still breaks the branch predictor by replacing the original ret, by using a second ret and causing an imbalance, it breaks the predictor even more. This patch replaces the ret with a jmp to keep the calls and ret balanced. I tested this on one box and it showed a 1.7% increase in performance. Another box only showed a small 0.3% increase. But no box that I tested this on showed a decrease in performance by making this change. Signed-off-by: Steven Rostedt Acked-by: Mathieu Desnoyers Cc: Frederic Weisbecker LKML-Reference: <20091013203425.042034383@goodmis.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 7 ++----- arch/x86/kernel/entry_64.S | 6 +++--- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..7d52e9da5e0c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1185,17 +1185,14 @@ END(ftrace_graph_caller) .globl return_to_handler return_to_handler: - pushl $0 pushl %eax - pushl %ecx pushl %edx movl %ebp, %eax call ftrace_return_to_handler - movl %eax, 0xc(%esp) + movl %eax, %ecx popl %edx - popl %ecx popl %eax - ret + jmp *%ecx #endif .section .rodata,"a" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f358..bd5bbddddf91 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -155,11 +155,11 @@ GLOBAL(return_to_handler) call ftrace_return_to_handler - movq %rax, 16(%rsp) + movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $16, %rsp - retq + addq $24, %rsp + jmp *%rdi #endif -- cgit v1.2.3 From 9844ab11c763bfed9f054c82366b19dcda66aca9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:03 +0400 Subject: x86, apic: Introduce the NOOP apic driver Introduce NOOP APIC driver. We should use it in case if apic was disabled due to hardware of software/firmware problems (including user requested to disable it case). The driver is attempting to catch any inappropriate apic operation call with warning issue. Also it is possible to use some apic operation like IPI calls, read/write without checking for apic presence which should make callers code easier. Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.534682104@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 + arch/x86/kernel/apic/Makefile | 2 +- arch/x86/kernel/apic/apic_noop.c | 194 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/apic/apic_noop.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d3e6cc..08a5f420e07b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); +extern struct apic apic_noop; + #ifdef CONFIG_X86_32 extern struct apic apic_default; diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd8..565c1bfc507d 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,7 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 000000000000..0b93ec2fde0a --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c @@ -0,0 +1,194 @@ +/* + * NOOP APIC driver. + * + * Does almost nothing and should be substituted by a real apic driver via + * probe routine. + * + * Though in case if apic is disabled (for some reason) we try + * to not uglify the caller's code and allow to call (some) apic routines + * like self-ipi, etc... and issue a warning if an operation is not allowed + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +/* + * some operations should never be reached with + * noop apic if it's not turned off, this mostly + * means the caller forgot to disable apic (or + * check the apic presence) before doing a call + */ +static void warn_apic_enabled(void) +{ + WARN_ONCE((cpu_has_apic || !disable_apic), + "APIC: Called for NOOP operation with apic enabled\n"); +} + +/* + * To check operations but do not bloat source code + */ +#define NOOP_FUNC(func) func { warn_apic_enabled(); } +#define NOOP_FUNC_RET(func, ret) func { warn_apic_enabled(); return ret; } + +NOOP_FUNC(static void noop_init_apic_ldr(void)) +NOOP_FUNC(static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector)) +NOOP_FUNC(static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)) +NOOP_FUNC(static void noop_send_IPI_allbutself(int vector)) +NOOP_FUNC(static void noop_send_IPI_all(int vector)) +NOOP_FUNC(static void noop_send_IPI_self(int vector)) +NOOP_FUNC_RET(static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip), -1) +NOOP_FUNC(static void noop_apic_write(u32 reg, u32 v)) +NOOP_FUNC(void noop_apic_wait_icr_idle(void)) +NOOP_FUNC_RET(static u32 noop_safe_apic_wait_icr_idle(void), 0) +NOOP_FUNC_RET(static u64 noop_apic_icr_read(void), 0) +NOOP_FUNC(static void noop_apic_icr_write(u32 low, u32 id)) +NOOP_FUNC_RET(static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map), phys_map) +NOOP_FUNC_RET(static int noop_cpu_to_logical_apicid(int cpu), 1) +NOOP_FUNC_RET(static int noop_default_phys_pkg_id(int cpuid_apic, int index_msb), 0) +NOOP_FUNC_RET(static unsigned int noop_get_apic_id(unsigned long x), 0) + +static int noop_probe(void) +{ + /* should not ever be enabled this way */ + return 0; +} + +static int noop_apic_id_registered(void) +{ + warn_apic_enabled(); + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static const struct cpumask *noop_target_cpus(void) +{ + warn_apic_enabled(); + + /* only BSP here */ + return cpumask_of(0); +} + +static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + warn_apic_enabled(); + return physid_isset(apicid, bitmap); +} + +static unsigned long noop_check_apicid_present(int bit) +{ + warn_apic_enabled(); + return physid_isset(bit, phys_cpu_present_map); +} + +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + warn_apic_enabled(); + if (cpu != 0) + pr_warning("APIC: Vector allocated for non-BSP cpu\n"); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); +} + +int noop_apicid_to_node(int logical_apicid) +{ + warn_apic_enabled(); + + /* we're always on node 0 */ + return 0; +} + +static u32 noop_apic_read(u32 reg) +{ + /* + * noop-read is always safe until we have + * non-disabled unit + */ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +struct apic apic_noop = { + .name = "noop", + .probe = noop_probe, + .acpi_madt_oem_check = NULL, + + .apic_id_registered = noop_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = noop_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = noop_check_apicid_used, + .check_apicid_present = noop_check_apicid_present, + + .vector_allocation_domain = noop_vector_allocation_domain, + .init_apic_ldr = noop_init_apic_ldr, + + .ioapic_phys_id_map = noop_ioapic_phys_id_map, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = noop_apicid_to_node, + + .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = default_apicid_to_cpu_present, + + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + + .phys_pkg_id = noop_default_phys_pkg_id, + + .mps_oem_check = NULL, + + .get_apic_id = noop_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = noop_send_IPI_mask, + .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, + .send_IPI_allbutself = noop_send_IPI_allbutself, + .send_IPI_all = noop_send_IPI_all, + .send_IPI_self = noop_send_IPI_self, + + .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, + + /* should be safe */ + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = NULL, + + .read = noop_apic_read, + .write = noop_apic_write, + .icr_read = noop_apic_icr_read, + .icr_write = noop_apic_icr_write, + .wait_icr_idle = noop_apic_wait_icr_idle, + .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, +}; -- cgit v1.2.3 From a933c61829509eb27083146dda392132baa0969a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:04 +0400 Subject: x86, apic: Use apic noop driver In case if apic were disabled we may use the whole apic NOOP driver instead of sparse poking the some functions in apic driver. Also NOOP would catch any inappropriate apic operation calls (not just read/write). Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.747817361@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f0717..61a5628810da 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -241,28 +241,12 @@ static int modern_apic(void) } /* - * bare function to substitute write operation - * and it's _that_ fast :) - */ -static void native_apic_write_dummy(u32 reg, u32 v) -{ - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); -} - -static u32 native_apic_read_dummy(u32 reg) -{ - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); - return 0; -} - -/* - * right after this call apic->write/read doesn't do anything - * note that there is no restore operation it works one way + * right after this call apic become NOOP driven + * so apic->write/read doesn't do anything */ void apic_disable(void) { - apic->read = native_apic_read_dummy; - apic->write = native_apic_write_dummy; + apic = &apic_noop; } void native_apic_wait_icr_idle(void) -- cgit v1.2.3 From 2626eb2b2fd958dc0f683126aa84e93b939699a1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:05 +0400 Subject: x86, apic: Limit apic dumping, introduce new show_lapic= setup option In case if a system has a large number of cpus printing apics contents may consume a long time period. We limit such an output by 1 apic by default. But to have an ability to see all apics or some part of them we introduce "show_lapic" setup option which allow us to limit/unlimit the number of APICs being dumped. Example: apic=debug show_lapic=5, or apic=debug show_lapic=all Also move apic_verbosity checking upper that way so helper routines do not need to inspect it at all. Suggested-by: Yinghai Lu Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.926793122@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 47 ++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f5..8c718c93d079 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1599,9 +1599,6 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_desc *desc; unsigned int irq; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", @@ -1708,9 +1705,6 @@ __apicdebuginit(void) print_APIC_field(int base) { int i; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG); for (i = 0; i < 8; i++) @@ -1724,9 +1718,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) unsigned int i, v, ver, maxlvt; u64 icr; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); @@ -1824,13 +1815,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +__apicdebuginit(void) print_local_APICs(int maxcpu) { int cpu; + if (!maxcpu) + return; + preempt_disable(); - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } preempt_enable(); } @@ -1839,7 +1836,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + if (!nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1866,21 +1863,41 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) +static int __initdata show_lapic = 1; +static __init int setup_show_lapic(char *arg) +{ + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +__apicdebuginit(int) print_ICs(void) { + if (apic_verbosity == APIC_QUIET) + return 0; + print_PIC(); /* don't print out if apic is not there */ if (!cpu_has_apic && !apic_from_smp_config()) return 0; - print_all_local_APICs(); + print_local_APICs(show_lapic); print_IO_APIC(); return 0; } -fs_initcall(print_all_ICs); +fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ -- cgit v1.2.3 From 6c2c502910247d2820cb630e7b28fb6bdecdbf45 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 30 Sep 2009 11:02:59 -0500 Subject: x86: SGI UV: Fix irq affinity for hub based interrupts This patch fixes handling of uv hub irq affinity. IRQs with ALL or NODE affinity can be routed to cpus other than their originally assigned cpu. Those with CPU affinity cannot be rerouted. Signed-off-by: Dimitri Sivanich LKML-Reference: <20090930160259.GA7822@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_irq.h | 15 ++++- arch/x86/kernel/apic/io_apic.c | 49 +++++++++++++-- arch/x86/kernel/uv_irq.c | 128 ++++++++++++++++++++++++++++++++++++--- drivers/misc/sgi-xp/xpc_uv.c | 5 +- 4 files changed, 180 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c0b647..5397e1290952 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry { dest : 32; }; +enum { + UV_AFFINITY_ALL, + UV_AFFINITY_NODE, + UV_AFFINITY_CPU +}; + extern struct irq_chip uv_irq_chip; -extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); +extern int +arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int); extern void arch_disable_uv_irq(int, unsigned long); +extern int uv_set_irq_affinity(unsigned int, const struct cpumask *); -extern int uv_setup_irq(char *, int, int, unsigned long); -extern void uv_teardown_irq(unsigned int, int, unsigned long); +extern int uv_irq_2_mmr_info(int, unsigned long *, int *); +extern int uv_setup_irq(char *, int, int, unsigned long, int); +extern void uv_teardown_irq(unsigned int); #endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8c718c93d079..bb52e7f6e953 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) * on the specified blade to allow the sending of MSIs to the specified CPU. */ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + spin_lock_irqsave(&vector_lock, flags); set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, irq_name); @@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) +void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); } + +int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} #endif /* CONFIG_X86_64 */ int __init io_apic_get_redir_entries (int ioapic) diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e4..9a83775ab0f3 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -9,10 +9,22 @@ */ #include +#include #include #include #include +#include + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; static void uv_noop(unsigned int irq) { @@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = { .unmask = uv_noop, .eoi = uv_ack_apic, .end = uv_noop, + .set_affinity = uv_set_irq_affinity, }; +/* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + /* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an * interrupt is raised. */ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { - int irq; - int ret; + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - irq = create_irq(); if (irq <= 0) return -EBUSY; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); - if (ret != irq) + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + restrict); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else destroy_irq(irq); return ret; @@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); * * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). */ -void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +void uv_teardown_irq(unsigned int irq) { - arch_disable_uv_irq(mmr_blade, mmr_offset); + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); destroy_irq(irq); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index c76677afda1b..b5bbe59f9c57 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); #if defined CONFIG_X86_64 - mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset); + mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, + UV_AFFINITY_CPU); if (mq->irq < 0) { dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", -mq->irq); @@ -136,7 +137,7 @@ static void xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) { #if defined CONFIG_X86_64 - uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset); + uv_teardown_irq(mq->irq); #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV int mmr_pnode; -- cgit v1.2.3 From 9338ad6ffb70eca97f335d93c54943828c8b209e Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 13 Oct 2009 15:32:36 -0500 Subject: x86, apic: Move SGI UV functionality out of generic IO-APIC code Move UV specific functionality out of the generic IO-APIC code. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091013203236.GD20543@sgi.com> [ Cleaned up the code some more in their new places. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 29 ++++++-- arch/x86/include/asm/uv/uv_irq.h | 7 -- arch/x86/kernel/apic/io_apic.c | 140 ++------------------------------------- arch/x86/kernel/uv_irq.c | 123 ++++++++++++++++++++++++++++++++-- 4 files changed, 145 insertions(+), 154 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08c..56f0877c9329 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -79,14 +79,31 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, int ioapic, int ioapic_pin, int trigger, int polarity) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, - struct io_apic_irq_attr *irq_attr); +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ +struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; + cpumask_var_t old_domain; + unsigned move_cleanup_count; + u8 vector; + u8 move_in_progress : 1; +}; + +extern struct irq_cfg *irq_cfg(unsigned int); +extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void send_cleanup_vector(struct irq_cfg *); +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 5397e1290952..d6b17c760622 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -31,13 +31,6 @@ enum { UV_AFFINITY_CPU }; -extern struct irq_chip uv_irq_chip; - -extern int -arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int); -extern void arch_disable_uv_irq(int, unsigned long); -extern int uv_set_irq_affinity(unsigned int, const struct cpumask *); - extern int uv_irq_2_mmr_info(int, unsigned long *, int *); extern int uv_setup_irq(char *, int, int, unsigned long, int); extern void uv_teardown_irq(unsigned int); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bb52e7f6e953..ce16b65cfdcc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -60,8 +60,6 @@ #include #include #include -#include -#include #include @@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ -struct irq_cfg { - struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[] = { @@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) } #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg = NULL; struct irq_desc *desc; @@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) /* end for move_irq_desc */ #else -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { return irq < nr_irqs ? irq_cfgx + irq : NULL; } @@ -1237,8 +1221,7 @@ next: return err; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -2245,7 +2228,7 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) +void send_cleanup_vector(struct irq_cfg *cfg) { cpumask_var_t cleanup_mask; @@ -2289,15 +2272,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - /* * Either sets desc->affinity to a valid value, and returns * ->cpu_mask_to_apicid of that, or returns BAD_APICID and * leaves desc->affinity untouched. */ -static unsigned int +unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; @@ -3725,116 +3705,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_UV -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int restrict) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg; - int mmr_pnode; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long flags; - int err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - if (restrict == UV_AFFINITY_CPU) - desc->status |= IRQ_NO_BALANCING; - else - desc->status |= IRQ_MOVE_PCNTXT; - - spin_lock_irqsave(&vector_lock, flags); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - spin_unlock_irqrestore(&vector_lock, flags); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} - -int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg = desc->chip_data; - unsigned int dest; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long mmr_offset; - unsigned mmr_pnode; - - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} -#endif /* CONFIG_X86_64 */ - int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 9a83775ab0f3..61d805df4c91 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -18,13 +18,16 @@ /* MMR offset and pnode of hub sourcing interrupts for a given irq */ struct uv_irq_2_mmr_pnode{ - struct rb_node list; - unsigned long offset; - int pnode; - int irq; + struct rb_node list; + unsigned long offset; + int pnode; + int irq; }; -static spinlock_t uv_irq_lock; -static struct rb_root uv_irq_root; + +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(unsigned int, const struct cpumask *); static void uv_noop(unsigned int irq) { @@ -131,6 +134,114 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) return -1; } +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int restrict) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + /* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an -- cgit v1.2.3 From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 19 Sep 2009 06:50:42 +0200 Subject: tracing: Move syscalls metadata handling from arch to core Most of the syscalls metadata processing is done from arch. But these operations are mostly generic accross archs. Especially now that we have a common variable name that expresses the number of syscalls supported by an arch: NR_syscalls, the only remaining bits that need to reside in arch is the syscall nr to addr translation. v2: Compare syscalls symbols only after the "sys" prefix so that we avoid spurious mismatches with archs that have syscalls wrappers, in which case syscalls symbols have "SyS" prefixed aliases. (Reported by: Heiko Carstens) Signed-off-by: Frederic Weisbecker Acked-by: Heiko Carstens Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Paul Mundt --- arch/s390/kernel/ftrace.c | 67 +-------------------------------- arch/x86/kernel/ftrace.c | 76 +------------------------------------- include/trace/syscall.h | 2 +- kernel/trace/trace_syscalls.c | 86 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 140 deletions(-) (limited to 'arch/x86') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 57bdcb1e3cdf..7c5752c3423d 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -206,73 +206,10 @@ out: #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned int sys_call_table[]; -static struct syscall_metadata **syscalls_metadata; - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - for (i = 0; i < NR_syscalls; i++) - if (syscalls_metadata[i]) - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup(syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; - } - return NULL; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - struct syscall_metadata *meta; - int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, - GFP_KERNEL); - if (!syscalls_metadata) - return -ENOMEM; - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta((unsigned long)sys_call_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)sys_call_table[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 25e6f5fc4b1e..5a1b9758fd62 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -470,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned long *sys_call_table; -static struct syscall_metadata **syscalls_metadata; - -static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name, str)) - return start; - } - return NULL; -} - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - int i; - struct syscall_metadata *meta; - unsigned long **psys_syscall_table = &sys_call_table; - - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); - if (!syscalls_metadata) { - WARN_ON(1); - return -ENOMEM; - } - - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta(psys_syscall_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)(&sys_call_table)[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae0..e972f0a40f8d 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -33,7 +33,7 @@ struct syscall_metadata { }; #ifdef CONFIG_FTRACE_SYSCALLS -extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9fbce6c9d2e1..8bda4bff2286 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -14,6 +14,69 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +static struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i]) { + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + } + } + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = { .trace = print_syscall_exit, }; +int __init init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + unsigned long addr; + int i; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + NR_syscalls, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return -ENOMEM; + } + + for (i = 0; i < NR_syscalls; i++) { + addr = arch_syscall_addr(i); + meta = find_syscall_meta(addr); + syscalls_metadata[i] = meta; + } + + return 0; +} +core_initcall(init_ftrace_syscalls); + #ifdef CONFIG_EVENT_PROFILE static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); -- cgit v1.2.3 From 89ccf465abe6b20d804a63ae20307970c441369d Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 14 Oct 2009 18:50:39 +0800 Subject: x86, perf_event: Rename 'performance counter interrupt' In 'cdd6c482c9ff9c55475ee7392ec8f672eddb7be6', we renamed Performance Counters -> Performance Events. The name showed up in /proc/interrupts also needs a change. I use PMI (Performance monitoring interrupt) here, since it is the official name used in Intel's documents. Signed-off-by: Li Hong Cc: Andrew Morton Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091014105039.GA22670@uhli> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..86fb2c8e065a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); + seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); - seq_printf(p, " Performance counter interrupts\n"); + seq_printf(p, " Performance monitoring interrupts\n"); seq_printf(p, "%*s: ", prec, "PND"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); -- cgit v1.2.3 From 799e2205ec65e174f752b558c62a92c4752df313 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2009 12:16:40 +0200 Subject: sched: Disable SD_PREFER_LOCAL for MC/CPU domains Yanmin reported that both tbench and hackbench were significantly hurt by trying to keep tasks local on these domains, esp on small cache machines. So disable it in order to promote spreading outside of the cache domains. Reported-by: "Zhang, Yanmin" Signed-off-by: Peter Zijlstra CC: Mike Galbraith LKML-Reference: <1255083400.8802.15.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 1 + include/linux/topology.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 25a92842dd99..d823c245f63b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,6 +143,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ + | 1*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ diff --git a/include/linux/topology.h b/include/linux/topology.h index fc0bf3edeb67..57e63579bfdd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -129,7 +129,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ @@ -162,7 +162,7 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ -- cgit v1.2.3 From 7ec13187ef48b04bb7f6dfa266c7271a52d009c2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 14 Oct 2009 15:06:42 +0200 Subject: x86, apic: Fix prototype in hw_irq.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This warning: In file included from arch/x86/include/asm/ipi.h:23, from arch/x86/kernel/apic/apic_noop.c:27: arch/x86/include/asm/hw_irq.h:105: warning: ‘struct irq_desc’ declared inside parameter list arch/x86/include/asm/hw_irq.h:105: warning: its scope is only this definition or declaration, which is probably not what you want triggers because irq_desc is defined after hw_irq.h is included in irq.h. Since it's pointer reference only, a forward declaration of the type will solve the problem. LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 56f0877c9329..1984ce9a13d2 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -102,6 +102,8 @@ struct irq_cfg { extern struct irq_cfg *irq_cfg(unsigned int); extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); extern void send_cleanup_vector(struct irq_cfg *); + +struct irq_desc; extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); -- cgit v1.2.3 From ac06ea2cd06291e63951b51dd7c9a23e6a1f2683 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 10 Oct 2009 09:35:48 +0200 Subject: x86: Remove BKL from microcode cycle_lock_kernel() in microcode_open() is a worthless exercise as there is nothing to wait for. Remove it. Signed-off-by: Thomas Gleixner LKML-Reference: <20091010153349.196074920@linutronix.de> --- arch/x86/kernel/microcode_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf8..2bcad3926edb 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -73,7 +73,6 @@ #include #include #include -#include #include #include #include @@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *unused1, struct file *unused2) { - cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } -- cgit v1.2.3 From 05d86412eab6a18cf57697474cc4f8fbfcd6936f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Oct 2009 19:02:20 +0200 Subject: x86: Remove BKL from apm_32 The lock/unlock kernel pair in do_open() got there with the BKL push down and protects nothing. Remove it. Replace the lock/unlock kernel in the ioctl code with a mutex to protect standbys_pending and suspends_pending. Signed-off-by: Thomas Gleixner LKML-Reference: <20091010153349.365236337@linutronix.de> --- arch/x86/kernel/apm_32.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5aa..b5b6b23bce53 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -204,7 +204,6 @@ #include #include -#include #include #include #include @@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); +static DEFINE_MUTEX(apm_mutex); /* * Set up a segment that references the real mode segment 0x40 @@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); - unlock_kernel(); + mutex_unlock(&apm_mutex); break; case APM_IOC_SUSPEND: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { ret = suspend(1); + mutex_unlock(&apm_mutex); } else { as->suspend_wait = 1; + mutex_unlock(&apm_mutex); wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); ret = as->suspend_result; } - unlock_kernel(); return ret; default: return -ENOTTY; @@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) { struct apm_user *as; - lock_kernel(); as = kmalloc(sizeof(*as), GFP_KERNEL); if (as == NULL) { printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", sizeof(*as)); - unlock_kernel(); return -ENOMEM; } as->magic = APM_BIOS_MAGIC; @@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) user_list = as; spin_unlock(&user_list_lock); filp->private_data = as; - unlock_kernel(); return 0; } -- cgit v1.2.3 From e47938b1faaf9e9041ae842a878901001ce20ea1 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 14 Oct 2009 09:16:30 -0500 Subject: x86: UV RTC: Fix early expiry handling Tune/fix early timer expiry handling and return correct early timeout value for set_next_event. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091014141630.GB11048@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 583f11d5c480..ec14889628e0 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -123,7 +123,10 @@ static int uv_setup_intr(int cpu, u64 expires) /* Initialize comparator value */ uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); - return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode)); + if (uv_read_rtc(NULL) <= expires) + return 0; + + return !uv_intr_pending(pnode); } /* @@ -223,6 +226,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) next_cpu = head->next_cpu; *t = expires; + /* Will this one be next to go off? */ if (next_cpu < 0 || bcpu == next_cpu || expires < head->cpu[next_cpu].expires) { @@ -231,7 +235,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) *t = ULLONG_MAX; uv_rtc_find_next_timer(head, pnode); spin_unlock_irqrestore(&head->lock, flags); - return 1; + return -ETIME; } } @@ -244,7 +248,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) * * Returns 1 if this timer was pending. */ -static int uv_rtc_unset_timer(int cpu) +static int uv_rtc_unset_timer(int cpu, int force) { int pnode = uv_cpu_to_pnode(cpu); int bid = uv_cpu_to_blade_id(cpu); @@ -256,14 +260,15 @@ static int uv_rtc_unset_timer(int cpu) spin_lock_irqsave(&head->lock, flags); - if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) + if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) rc = 1; - *t = ULLONG_MAX; - - /* Was the hardware setup for this timer? */ - if (head->next_cpu == bcpu) - uv_rtc_find_next_timer(head, pnode); + if (rc) { + *t = ULLONG_MAX; + /* Was the hardware setup for this timer? */ + if (head->next_cpu == bcpu) + uv_rtc_find_next_timer(head, pnode); + } spin_unlock_irqrestore(&head->lock, flags); @@ -310,20 +315,20 @@ static void uv_rtc_timer_setup(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - uv_rtc_unset_timer(ced_cpu); + uv_rtc_unset_timer(ced_cpu, 1); break; } } static void uv_rtc_interrupt(void) { - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); int cpu = smp_processor_id(); + struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); if (!ced || !ced->event_handler) return; - if (uv_rtc_unset_timer(cpu) != 1) + if (uv_rtc_unset_timer(cpu, 0) != 1) return; ced->event_handler(ced); -- cgit v1.2.3 From 8c28de4d011f37b2893ecfcec9a985c0e9bd786f Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 14 Oct 2009 09:18:48 -0500 Subject: x86: UV RTC: Add clocksource only boot option Add clocksource only boot option for UV RTC. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091014141848.GC11048@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index ec14889628e0..c6324ad7c0d9 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -75,6 +75,7 @@ struct uv_rtc_timer_head { static struct uv_rtc_timer_head **blade_info __read_mostly; static int uv_rtc_enable; +static int uv_rtc_evt_enable; /* * Hardware interface routines @@ -342,6 +343,14 @@ static int __init uv_enable_rtc(char *str) } __setup("uvrtc", uv_enable_rtc); +static int __init uv_enable_evt_rtc(char *str) +{ + uv_rtc_evt_enable = 1; + + return 1; +} +__setup("uvrtcevt", uv_enable_evt_rtc); + static __init void uv_rtc_register_clockevents(struct work_struct *dummy) { struct clock_event_device *ced = &__get_cpu_var(cpu_ced); @@ -358,16 +367,20 @@ static __init int uv_rtc_setup_clock(void) if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) return -ENODEV; - generic_interrupt_extension = uv_rtc_interrupt; - clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, clocksource_uv.shift); rc = clocksource_register(&clocksource_uv); - if (rc) { - generic_interrupt_extension = NULL; + if (rc) + printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); + else + printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", + sn_rtc_cycles_per_second/(unsigned long)1E6); + + if (rc || !uv_rtc_evt_enable) return rc; - } + + generic_interrupt_extension = uv_rtc_interrupt; /* Setup and register clockevents */ rc = uv_rtc_allocate_timers(); -- cgit v1.2.3 From d5991ff297ad2f7e2698eefcd8269df5ecec150f Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 14 Oct 2009 09:21:03 -0500 Subject: x86: UV RTC: Clean up error handling Cleanup error handling in uv_rtc_setup_clock. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091014142103.GD11048@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index c6324ad7c0d9..255645084534 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -380,15 +380,12 @@ static __init int uv_rtc_setup_clock(void) if (rc || !uv_rtc_evt_enable) return rc; - generic_interrupt_extension = uv_rtc_interrupt; - /* Setup and register clockevents */ rc = uv_rtc_allocate_timers(); - if (rc) { - clocksource_unregister(&clocksource_uv); - generic_interrupt_extension = NULL; - return rc; - } + if (rc) + goto error; + + generic_interrupt_extension = uv_rtc_interrupt; clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, NSEC_PER_SEC, clock_event_device_uv.shift); @@ -401,11 +398,19 @@ static __init int uv_rtc_setup_clock(void) rc = schedule_on_each_cpu(uv_rtc_register_clockevents); if (rc) { - clocksource_unregister(&clocksource_uv); generic_interrupt_extension = NULL; uv_rtc_deallocate_timers(); + goto error; } + printk(KERN_INFO "UV RTC clockevents registered\n"); + + return 0; + +error: + clocksource_unregister(&clocksource_uv); + printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); + return rc; } arch_initcall(uv_rtc_setup_clock); -- cgit v1.2.3 From 4a4de9c7d7111ce4caf422b856756125d8304f9d Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 14 Oct 2009 09:22:57 -0500 Subject: x86: UV RTC: Rename generic_interrupt to x86_platform_ipi Signed-off-by: Dimitri Sivanich LKML-Reference: <20091014142257.GE11048@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/hardirq.h | 2 +- arch/x86/include/asm/hw_irq.h | 4 ++-- arch/x86/include/asm/irq.h | 2 +- arch/x86/include/asm/irq_vectors.h | 2 +- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/irq.c | 20 ++++++++++---------- arch/x86/kernel/irqinit.c | 4 ++-- arch/x86/kernel/uv_time.c | 10 +++++----- 9 files changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index f5693c81a1db..8e8ec663a98f 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -34,7 +34,7 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, smp_invalidate_interrupt) #endif -BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) +BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) /* * every pentium local APIC has two 'local interrupts', with a diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f01043..beaabd794a10 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -12,7 +12,7 @@ typedef struct { unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq_spurious_count; #endif - unsigned int generic_irqs; /* arch dependent */ + unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; unsigned int apic_pending_irqs; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d93b08c..95207ca5c6f1 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -27,7 +27,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); -extern void generic_interrupt(void); +extern void x86_platform_ipi(void); extern void error_interrupt(void); extern void perf_pending_interrupt(void); @@ -101,7 +101,7 @@ extern void eisa_set_level_irq(unsigned int irq); /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); -extern void smp_generic_interrupt(struct pt_regs *); +extern void smp_x86_platform_ipi(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); #ifdef CONFIG_X86_IO_APIC extern asmlinkage void smp_irq_move_cleanup_interrupt(void); diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f4..fcbc6d144501 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -36,7 +36,7 @@ static inline int irq_canonicalize(int irq) extern void fixup_irqs(void); #endif -extern void (*generic_interrupt_extension)(void); +extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5b21f0ec3df2..6a635bd39867 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -106,7 +106,7 @@ /* * Generic system vector for platform specific use */ -#define GENERIC_INTERRUPT_VECTOR 0xed +#define X86_PLATFORM_IPI_VECTOR 0xed /* * Performance monitoring pending work vector: diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f8f358..6714432ef381 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -969,8 +969,8 @@ apicinterrupt UV_BAU_MESSAGE \ #endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt -apicinterrupt GENERIC_INTERRUPT_VECTOR \ - generic_interrupt smp_generic_interrupt +apicinterrupt X86_PLATFORM_IPI_VECTOR \ + x86_platform_ipi smp_x86_platform_ipi #ifdef CONFIG_SMP apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..9375dce39f5f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -18,7 +18,7 @@ atomic_t irq_err_count; /* Function pointer for generic interrupt vector handling */ -void (*generic_interrupt_extension)(void) = NULL; +void (*x86_platform_ipi_callback)(void) = NULL; /* * 'what should we do if we get a hw irq event on an illegal vector'. @@ -72,10 +72,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); seq_printf(p, " Performance pending work\n"); #endif - if (generic_interrupt_extension) { + if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); + seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); seq_printf(p, " Platform interrupts\n"); } #ifdef CONFIG_SMP @@ -187,8 +187,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_perf_irqs; sum += irq_stats(cpu)->apic_pending_irqs; #endif - if (generic_interrupt_extension) - sum += irq_stats(cpu)->generic_irqs; + if (x86_platform_ipi_callback) + sum += irq_stats(cpu)->x86_platform_ipis; #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; @@ -252,9 +252,9 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } /* - * Handler for GENERIC_INTERRUPT_VECTOR. + * Handler for X86_PLATFORM_IPI_VECTOR. */ -void smp_generic_interrupt(struct pt_regs *regs) +void smp_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -264,10 +264,10 @@ void smp_generic_interrupt(struct pt_regs *regs) irq_enter(); - inc_irq_stat(generic_irqs); + inc_irq_stat(x86_platform_ipis); - if (generic_interrupt_extension) - generic_interrupt_extension(); + if (x86_platform_ipi_callback) + x86_platform_ipi_callback(); run_local_timers(); irq_exit(); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 40f30773fb29..d5932226614f 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -200,8 +200,8 @@ static void __init apic_intr_init(void) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); + /* IPI for X86 platform specific use */ + alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 255645084534..3da7b1d8bfd3 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -91,7 +91,7 @@ static void uv_rtc_send_IPI(int cpu) pnode = uv_apicid_to_pnode(apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | - (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT); + (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } @@ -116,7 +116,7 @@ static int uv_setup_intr(int cpu, u64 expires) uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, UVH_EVENT_OCCURRED0_RTC1_MASK); - val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | + val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); /* Set configuration */ @@ -364,7 +364,7 @@ static __init int uv_rtc_setup_clock(void) { int rc; - if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) + if (!uv_rtc_enable || !is_uv_system() || x86_platform_ipi_callback) return -ENODEV; clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, @@ -385,7 +385,7 @@ static __init int uv_rtc_setup_clock(void) if (rc) goto error; - generic_interrupt_extension = uv_rtc_interrupt; + x86_platform_ipi_callback = uv_rtc_interrupt; clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, NSEC_PER_SEC, clock_event_device_uv.shift); @@ -398,7 +398,7 @@ static __init int uv_rtc_setup_clock(void) rc = schedule_on_each_cpu(uv_rtc_register_clockevents); if (rc) { - generic_interrupt_extension = NULL; + x86_platform_ipi_callback = NULL; uv_rtc_deallocate_timers(); goto error; } -- cgit v1.2.3 From e9a63a4e559fbdc522072281d05e6b13c1022f4b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 14 Oct 2009 14:16:38 -0700 Subject: x86: linker script syntax nits The linker scripts grew some use of weirdly wrong linker script syntax. It happens to work, but it's not what the syntax is documented to be. Clean it up to use the official syntax. Signed-off-by: Roland McGrath CC: Ian Lance Taylor --- arch/x86/kernel/acpi/realmode/wakeup.lds.S | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..0e50e1e5c573 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } - - . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); } + +ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..8d6001ad8d8d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -. = ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,7 +332,6 @@ INIT_PER_CPU(irq_stack_union); #ifdef CONFIG_KEXEC #include -. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif - -- cgit v1.2.3 From db8590f5043f3436a65b24155a3a7af2604df876 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 08:08:12 +0200 Subject: Revert "x86: linker script syntax nits" This reverts commit e9a63a4e559fbdc522072281d05e6b13c1022f4b. This breaks older binutils, where sink-less asserts are broken. See this commit for further details: d2ba8b2: x86: Fix assert syntax in vmlinux.lds.S Acked-by: "H. Peter Anvin" Acked-by: Sam Ravnborg Cc: Linus Torvalds LKML-Reference: <4AD6523D.5030909@zytor.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/realmode/wakeup.lds.S | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 0e50e1e5c573..7da00b799cda 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } -} -ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); + . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); +} diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8d6001ad8d8d..92929fb3f9fa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,6 +332,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif + -- cgit v1.2.3 From f88f2b4fdb1e098433ad2b005b6f7353f7268ce1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 15 Oct 2009 19:04:16 +0400 Subject: x86: apic: Allow noop operations to be called almost at any time As only apic noop is used we allow to use almost any operation caller wants (and which of them noop driver supports of course). Initially it was reported by Ingo Molnar that apic noop issue a warning for pkg id (which is actually false positive and should be eliminated). So we save checking (and warning issue) for read/write operations while allow any other ops to be freely used. Also: - fix noop_cpu_to_logical_apicid, it should be 0. - rename noop_default_phys_pkg_id to noop_phys_pkg_id (we use default_ prefix for more general routines in apic subsystem). Reported-by: Ingo Molnar Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki LKML-Reference: <20091015150416.GC5331@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 1 + arch/x86/kernel/apic/apic_noop.c | 105 +++++++++++++++++++++------------------ 2 files changed, 59 insertions(+), 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 61a5628810da..dce93d4b0eaf 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -246,6 +246,7 @@ static int modern_apic(void) */ void apic_disable(void) { + pr_info("APIC: switched to apic NOOP\n"); apic = &apic_noop; } diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 0b93ec2fde0a..9ab6ffb313ac 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -6,7 +6,7 @@ * * Though in case if apic is disabled (for some reason) we try * to not uglify the caller's code and allow to call (some) apic routines - * like self-ipi, etc... and issue a warning if an operation is not allowed + * like self-ipi, etc... */ #include @@ -30,76 +30,88 @@ #include #include -/* - * some operations should never be reached with - * noop apic if it's not turned off, this mostly - * means the caller forgot to disable apic (or - * check the apic presence) before doing a call - */ -static void warn_apic_enabled(void) +static void noop_init_apic_ldr(void) { } +static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_allbutself(int vector) { } +static void noop_send_IPI_all(int vector) { } +static void noop_send_IPI_self(int vector) { } +static void noop_apic_wait_icr_idle(void) { } +static void noop_apic_icr_write(u32 low, u32 id) { } + +static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) { - WARN_ONCE((cpu_has_apic || !disable_apic), - "APIC: Called for NOOP operation with apic enabled\n"); + return -1; } -/* - * To check operations but do not bloat source code - */ -#define NOOP_FUNC(func) func { warn_apic_enabled(); } -#define NOOP_FUNC_RET(func, ret) func { warn_apic_enabled(); return ret; } - -NOOP_FUNC(static void noop_init_apic_ldr(void)) -NOOP_FUNC(static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector)) -NOOP_FUNC(static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)) -NOOP_FUNC(static void noop_send_IPI_allbutself(int vector)) -NOOP_FUNC(static void noop_send_IPI_all(int vector)) -NOOP_FUNC(static void noop_send_IPI_self(int vector)) -NOOP_FUNC_RET(static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip), -1) -NOOP_FUNC(static void noop_apic_write(u32 reg, u32 v)) -NOOP_FUNC(void noop_apic_wait_icr_idle(void)) -NOOP_FUNC_RET(static u32 noop_safe_apic_wait_icr_idle(void), 0) -NOOP_FUNC_RET(static u64 noop_apic_icr_read(void), 0) -NOOP_FUNC(static void noop_apic_icr_write(u32 low, u32 id)) -NOOP_FUNC_RET(static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map), phys_map) -NOOP_FUNC_RET(static int noop_cpu_to_logical_apicid(int cpu), 1) -NOOP_FUNC_RET(static int noop_default_phys_pkg_id(int cpuid_apic, int index_msb), 0) -NOOP_FUNC_RET(static unsigned int noop_get_apic_id(unsigned long x), 0) +static u32 noop_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static u64 noop_apic_icr_read(void) +{ + return 0; +} + +static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +static int noop_cpu_to_logical_apicid(int cpu) +{ + return 0; +} + +static int noop_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return 0; +} + +static unsigned int noop_get_apic_id(unsigned long x) +{ + return 0; +} static int noop_probe(void) { - /* should not ever be enabled this way */ + /* + * NOOP apic should not ever be + * enabled via probe routine + */ return 0; } static int noop_apic_id_registered(void) { - warn_apic_enabled(); - return physid_isset(read_apic_id(), phys_cpu_present_map); + /* + * if we would be really "pedantic" + * we should pass read_apic_id() here + * but since NOOP suppose APIC ID = 0 + * lets save a few cycles + */ + return physid_isset(0, phys_cpu_present_map); } static const struct cpumask *noop_target_cpus(void) { - warn_apic_enabled(); - /* only BSP here */ return cpumask_of(0); } static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid) { - warn_apic_enabled(); return physid_isset(apicid, bitmap); } static unsigned long noop_check_apicid_present(int bit) { - warn_apic_enabled(); return physid_isset(bit, phys_cpu_present_map); } static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) { - warn_apic_enabled(); if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); cpumask_clear(retmask); @@ -108,22 +120,21 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) int noop_apicid_to_node(int logical_apicid) { - warn_apic_enabled(); - /* we're always on node 0 */ return 0; } static u32 noop_apic_read(u32 reg) { - /* - * noop-read is always safe until we have - * non-disabled unit - */ WARN_ON_ONCE((cpu_has_apic && !disable_apic)); return 0; } +static void noop_apic_write(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + struct apic apic_noop = { .name = "noop", .probe = noop_probe, @@ -157,7 +168,7 @@ struct apic apic_noop = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = noop_default_phys_pkg_id, + .phys_pkg_id = noop_phys_pkg_id, .mps_oem_check = NULL, -- cgit v1.2.3 From a5912f6b3e20c137172460e6d4dd180866c00963 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 Oct 2009 07:18:46 +0200 Subject: x86: Document linker script ASSERT() quirk Older binutils breaks if ASSERT() is used without a sink for the output. For example 2.14.90.0.6 is known to be broken, the link fails with: LD .tmp_vmlinux1 ld:arch/x86/kernel/vmlinux.lds:678: parse error Document this quirk in all three files that use it. See: http://marc.info/?l=linux-kbuild&m=124930110427870&w=2 See[2]: d2ba8b2 ("x86: Fix assert syntax in vmlinux.lds.S") Cc: Linus Torvalds Cc: Roland McGrath Cc: "H. Peter Anvin" Cc: Sam Ravnborg LKML-Reference: <4AD6523D.5030909@zytor.com> Signed-off-by: Ingo Molnar --- arch/x86/boot/setup.ld | 3 +++ arch/x86/kernel/acpi/realmode/wakeup.lds.S | 3 +++ arch/x86/kernel/vmlinux.lds.S | 3 +++ 3 files changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0f6ec455a2b1..03c0683636b6 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -53,6 +53,9 @@ SECTIONS /DISCARD/ : { *(.note*) } + /* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ . = ASSERT(_end <= 0x8000, "Setup too big!"); . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); /* Necessary for the very-old-loader check to work... */ diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..060fff8f5c5b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -57,5 +57,8 @@ SECTIONS *(.note*) } + /* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb3f9fa..3c68fe2d46cf 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,6 +305,9 @@ SECTIONS #ifdef CONFIG_X86_32 +/* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); #else -- cgit v1.2.3 From 036ed8ba61b72c19dc5759446d4fe0844aa88255 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Thu, 15 Oct 2009 17:40:00 -0500 Subject: x86, UV: Fix information in __uv_hub_info structure A few parts of the uv_hub_info structure are initialized incorrectly. - n_val is being loaded with m_val. - gpa_mask is initialized with a bytes instead of an unsigned long. - Handle the case where none of the alias registers are used. Lastly I converted the bau over to using the uv_hub_info->m_val which is the correct value. Without this patch, booting a large configuration hits a problem where the upper bits of the gnode affect the pnode and the bau will not operate. Signed-off-by: Robin Holt Acked-by: Jack Steiner Cc: Cliff Whickman Cc: stable@kernel.org LKML-Reference: <20091015224946.396355000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++---- arch/x86/kernel/tlb_uv.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886a6b53..326c25477d3d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { alias.v = uv_read_local_mmr(redir_addrs[i].alias); - if (alias.s.base == 0) { + if (alias.s.enable && alias.s.base == 0) { *size = (1UL << alias.s.m_alias); redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; return; } } - BUG(); + *base = *size = 0; } enum map_type {map_wb, map_uc}; @@ -619,12 +619,12 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; uv_cpu_hub_info(cpu)->m_val = m_val; - uv_cpu_hub_info(cpu)->n_val = m_val; + uv_cpu_hub_info(cpu)->n_val = n_val; uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; uv_cpu_hub_info(cpu)->pnode = pnode; uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; - uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 503c1f2e8835..f99fb6acfe34 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -843,8 +843,8 @@ static int __init uv_bau_init(void) GFP_KERNEL, cpu_to_node(cur_cpu)); uv_bau_retry_limit = 1; - uv_nshift = uv_hub_info->n_val; - uv_mmask = (1UL << uv_hub_info->n_val) - 1; + uv_nshift = uv_hub_info->m_val; + uv_mmask = (1UL << uv_hub_info->m_val) - 1; nblades = uv_num_possible_blades(); uv_bau_table_bases = (struct bau_control **) -- cgit v1.2.3 From 93ae5012a79b11e7fc855b52c7ce1e16fe1540b0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 15 Oct 2009 14:21:14 -0700 Subject: x86: Don't print number of MCE banks for every CPU The MCE initialization code explicitly says it doesn't handle asymmetric configurations where different CPUs support different numbers of MCE banks, and it prints a big warning in that case. Therefore, printing the "mce: CPU supports MCE banks" message into the kernel log for every CPU is pure redundancy that clutters the log significantly for systems with lots of CPUs. Signed-off-by: Roland Dreier LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d0..721a77ca8115 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1214,7 +1214,8 @@ static int __cpuinit mce_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); + if (!banks) + printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); if (b > MAX_NR_BANKS) { printk(KERN_WARNING -- cgit v1.2.3 From 5e09954a9acc3b435ffe318b95afd3c02fae069f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 12:31:32 +0200 Subject: x86, mce: Fix up MCE naming nomenclature Prefix global/setup routines with "mcheck_" thus differentiating from the internal facilities prefixed with "mce_". Also, prefix the per cpu calls with mcheck_cpu and rename them to reflect the MCE setup hierarchy of calls better. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Cc: Andi Kleen LKML-Reference: <1255689093-26921-1-git-send-email-borislav.petkov@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 4 ++-- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 52 ++++++++++++++++++++-------------------- 3 files changed, 29 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 227a72df6441..161485da6838 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -120,9 +120,9 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); +void mcheck_cpu_init(struct cpuinfo_x86 *c); #else -static inline void mcheck_init(struct cpuinfo_x86 *c) {} +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_ANCIENT_MCE diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..4df69a38be57 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -839,7 +839,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ - mcheck_init(c); + mcheck_cpu_init(c); #endif select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0fb9dc50697e..68d968e69b13 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1136,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mcheck_timer(unsigned long data) +static void mce_start_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); int *n; @@ -1220,7 +1220,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int __cpuinit mce_cap_init(void) +static int __cpuinit __mcheck_cpu_cap_init(void) { unsigned b; u64 cap; @@ -1258,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) return 0; } -static void mce_init(void) +static void __mcheck_cpu_init_generic(void) { mce_banks_t all_banks; u64 cap; @@ -1287,7 +1287,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); @@ -1355,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) return; @@ -1369,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) } } -static void mce_cpu_features(struct cpuinfo_x86 *c) +static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -1383,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) +static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); @@ -1394,7 +1394,7 @@ static void mce_init_timer(void) *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mcheck_timer, smp_processor_id()); + setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1414,26 +1414,26 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = * Called for each booted CPU to set up machine checks. * Must be called with preempt off: */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { if (mce_disabled) return; - mce_ancient_init(c); + __mcheck_cpu_ancient_init(c); if (!mce_available(c)) return; - if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { + if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { mce_disabled = 1; return; } machine_check_vector = do_machine_check; - mce_init(); - mce_cpu_features(c); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); if (raw_smp_processor_id() == 0) @@ -1665,7 +1665,7 @@ __setup("mce", mcheck_enable); * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable(void) +static int mce_disable_error_reporting(void) { int i; @@ -1680,12 +1680,12 @@ static int mce_disable(void) static int mce_suspend(struct sys_device *dev, pm_message_t state) { - return mce_disable(); + return mce_disable_error_reporting(); } static int mce_shutdown(struct sys_device *dev) { - return mce_disable(); + return mce_disable_error_reporting(); } /* @@ -1695,8 +1695,8 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(); - mce_cpu_features(¤t_cpu_data); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(¤t_cpu_data); return 0; } @@ -1706,8 +1706,8 @@ static void mce_cpu_restart(void *data) del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(¤t_cpu_data)) return; - mce_init(); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_timer(); } /* Reinit MCEs after user configuration changes */ @@ -1733,7 +1733,7 @@ static void mce_enable_ce(void *all) cmci_reenable(); cmci_recheck(); if (all) - mce_init_timer(); + __mcheck_cpu_init_timer(); } static struct sysdev_class mce_sysclass = { @@ -2042,7 +2042,7 @@ static __init void mce_init_banks(void) } } -static __init int mce_init_device(void) +static __init int mcheck_init_device(void) { int err; int i = 0; @@ -2070,7 +2070,7 @@ static __init int mce_init_device(void) return err; } -device_initcall(mce_init_device); +device_initcall(mcheck_init_device); /* * Old style boot options parsing. Only for compatibility. @@ -2118,7 +2118,7 @@ static int fake_panic_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mce_debugfs_init(void) +static int __init mcheck_debugfs_init(void) { struct dentry *dmce, *ffake_panic; @@ -2132,5 +2132,5 @@ static int __init mce_debugfs_init(void) return 0; } -late_initcall(mce_debugfs_init); +late_initcall(mcheck_debugfs_init); #endif -- cgit v1.2.3 From b33a6363649f0ff83ec81597ea7fe7e688f973cb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 12:31:33 +0200 Subject: x86, mce: Add a global MCE init helper Add an early initcall (pre SMP) which sets up global MCE functionality. Signed-off-by: Borislav Petkov Cc: Andi Kleen LKML-Reference: <1255689093-26921-2-git-send-email-borislav.petkov@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 68d968e69b13..80801705edd7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1436,8 +1436,6 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - if (raw_smp_processor_id() == 0) - atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); } /* @@ -1657,6 +1655,14 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); +static int __init mcheck_init(void) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + + return 0; +} +early_initcall(mcheck_init); + /* * Sysfs support */ -- cgit v1.2.3 From 1d21e6e3ffad2939f9d8179817c6f9bc3b811b68 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Fri, 16 Oct 2009 06:29:20 -0500 Subject: x86, UV: Fix and clean up bau code to use uv_gpa_to_pnode() Create an inline function to extract the pnode from a global physical address and then convert the broadcast assist unit to use the newly created uv_gpa_to_pnode function. The open-coded code was wrong as well - it might explain a few of our unexplained bau hangs. Signed-off-by: Robin Holt Acked-by: Cliff Whickman Cc: linux-mm@kvack.org Cc: Jack Steiner LKML-Reference: <20091016112920.GZ8903@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 16 +++++++++++++++- arch/x86/kernel/tlb_uv.c | 7 ++----- 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 04eb6c958b9d..94908a08020a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -114,7 +114,7 @@ /* * The largest possible NASID of a C or M brick (+ 2) */ -#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) +#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2) struct uv_scir_s { struct timer_list timer; @@ -230,6 +230,20 @@ static inline unsigned long uv_gpa(void *v) return uv_soc_phys_ram_to_gpa(__pa(v)); } +/* gnode -> pnode */ +static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) +{ + return gpa >> uv_hub_info->m_val; +} + +/* gpa -> pnode */ +static inline int uv_gpa_to_pnode(unsigned long gpa) +{ + unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1; + + return uv_gpa_to_gnode(gpa) & n_mask; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f99fb6acfe34..1740c85e24bb 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -23,8 +23,6 @@ static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; -/* position of pnode (which is nasid>>1): */ -static int uv_nshift __read_mostly; /* base pnode in this partition */ static int uv_partition_base_pnode __read_mostly; @@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode) BUG_ON(!adp); pa = uv_gpa(adp); /* need the real nasid*/ - n = pa >> uv_nshift; + n = uv_gpa_to_pnode(pa); m = pa & uv_mmask; uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, @@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) * need the pnode of where the memory was really allocated */ pa = uv_gpa(pqp); - pn = pa >> uv_nshift; + pn = uv_gpa_to_pnode(pa); uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | @@ -843,7 +841,6 @@ static int __init uv_bau_init(void) GFP_KERNEL, cpu_to_node(cur_cpu)); uv_bau_retry_limit = 1; - uv_nshift = uv_hub_info->m_val; uv_mmask = (1UL << uv_hub_info->m_val) - 1; nblades = uv_num_possible_blades(); -- cgit v1.2.3 From ace1546487a0fe4634e3251067f8a32cb2cdc099 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 8 Oct 2009 10:55:03 -0300 Subject: KVM: use proper hrtimer function to retrieve expiration time hrtimer->base can be temporarily NULL due to racing hrtimer_start. See switch_hrtimer_base/lock_hrtimer_base. Use hrtimer_get_remaining which is robust against it. CC: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 +- arch/x86/kvm/lapic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 82ad523b4901..144e7f60b5e2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) * itself with the initial count and continues counting * from there. */ - remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); + remaining = hrtimer_get_remaining(&ps->pit_timer.timer); elapsed = ps->pit_timer.period - ktime_to_ns(remaining); elapsed = mod_64(elapsed, ps->pit_timer.period); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7024224f0fc8..23c217692ea9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); + remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); if (ktime_to_ns(remaining) < 0) remaining = ktime_set(0, 0); -- cgit v1.2.3 From 8a8365c560b8b631e0a2d1ac032fbca66a9645bc Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Fri, 9 Oct 2009 11:42:56 +0000 Subject: KVM: MMU: fix pointer cast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a 32 bits compile, commit 3da0dd433dc399a8c0124d0614d82a09b6a49bce introduced the following warnings: arch/x86/kvm/mmu.c: In function ‘kvm_set_pte_rmapp’: arch/x86/kvm/mmu.c:770: warning: cast to pointer from integer of different size arch/x86/kvm/mmu.c: In function ‘kvm_set_spte_hva’: arch/x86/kvm/mmu.c:849: warning: cast from pointer to integer of different size The following patch uses 'unsigned long' instead of u64 to match the pointer size on both arches. Signed-off-by: Frederik Deweerdt Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 685a4ffac8e6..818b92ad82cf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long data) { u64 *spte; int need_tlb_flush = 0; @@ -763,7 +764,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) return need_tlb_flush; } -static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long data) { int need_flush = 0; u64 *spte, new_spte; @@ -799,9 +801,10 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) return 0; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, - u64 data)) + unsigned long data)) { int i, j; int retval = 0; @@ -846,10 +849,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { - kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); + kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, + unsigned long data) { u64 *spte; int young = 0; -- cgit v1.2.3 From 8c95bc3e206cff7a55edd2fc5f0e2b305d57903f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:36 -0400 Subject: x86: Add MMX/SSE opcode groups to opcode map Add missing MMX/SSE opcode groups to x86 opcode map. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000736.16556.29061.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 78a0daf12e15..e7285d8379e3 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -777,12 +777,22 @@ GrpTable: Grp11 EndTable GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) EndTable GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) EndTable GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) +3: psrldq Udq,Ib (66),(11B) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) +7: pslldq Udq,Ib (66),(11B) EndTable GrpTable: Grp15 -- cgit v1.2.3 From d1baf5a5a6088e2991b7dbbd370ff200bd6615ce Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:44 -0400 Subject: x86: Add AMD prefetch and 3DNow! opcodes to opcode map Add AMD prefetch and 3DNow! opcode including FEMMS. Since 3DNow! uses the last immediate byte as an opcode extension byte, x86 insn just treats the extenstion byte as an immediate byte instead of a part of opcode (insn_get_opcode() decodes first "0x0f 0x0f" bytes.) Users who are interested in analyzing 3DNow! opcode still can decode it by analyzing the immediate byte. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000744.16556.27881.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index e7285d8379e3..894497f77808 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -306,9 +306,10 @@ Referrer: 2-byte escape 0a: 0b: UD2 (1B) 0c: -0d: NOP Ev -0e: -0f: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f 10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) 11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) @@ -813,6 +814,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + GrpTable: GrpPDLK 0: MONTMUL 1: XSHA1 -- cgit v1.2.3 From 0e1227d356e9b2fe0500d6cc7084f752040a1e0e Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 19 Oct 2009 11:53:06 +0900 Subject: crypto: ghash - Add PCLMULQDQ accelerated implementation PCLMULQDQ is used to accelerate the most time-consuming part of GHASH, carry-less multiplication. More information about PCLMULQDQ can be found at: http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ Because PCLMULQDQ changes XMM state, its usage must be enclosed with kernel_fpu_begin/end, which can be used only in process context, the acceleration is implemented as crypto_ahash. That is, request in soft IRQ context will be defered to the cryptd kernel thread. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/ghash-clmulni-intel_asm.S | 157 ++++++++++++++ arch/x86/crypto/ghash-clmulni-intel_glue.c | 333 +++++++++++++++++++++++++++++ arch/x86/include/asm/cpufeature.h | 1 + crypto/Kconfig | 8 + crypto/cryptd.c | 7 + include/crypto/cryptd.h | 1 + 7 files changed, 510 insertions(+) create mode 100644 arch/x86/crypto/ghash-clmulni-intel_asm.S create mode 100644 arch/x86/crypto/ghash-clmulni-intel_glue.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index cfb0010fa940..1a58ad89fdf7 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o @@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o + +ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 000000000000..b9e787a511da --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -0,0 +1,157 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains accelerated part of ghash + * implementation. More information about PCLMULQDQ can be found at: + * + * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include + +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +.Lpoly: + .octa 0xc2000000000000000000000000000001 +.Ltwo_one: + .octa 0x00000001000000000000000000000001 + +#define DATA %xmm0 +#define SHASH %xmm1 +#define T1 %xmm2 +#define T2 %xmm3 +#define T3 %xmm4 +#define BSWAP %xmm5 +#define IN1 %xmm6 + +.text + +/* + * __clmul_gf128mul_ble: internal ABI + * input: + * DATA: operand1 + * SHASH: operand2, hash_key << 1 mod poly + * output: + * DATA: operand1 * operand2 mod poly + * changed: + * T1 + * T2 + * T3 + */ +__clmul_gf128mul_ble: + movaps DATA, T1 + pshufd $0b01001110, DATA, T2 + pshufd $0b01001110, SHASH, T3 + pxor DATA, T2 + pxor SHASH, T3 + + # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 + .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 + # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 + .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 + # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) + .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 + pxor DATA, T2 + pxor T1, T2 # T2 = a0 * b1 + a1 * b0 + + movaps T2, T3 + pslldq $8, T3 + psrldq $8, T2 + pxor T3, DATA + pxor T2, T1 # is result of + # carry-less multiplication + + # first phase of the reduction + movaps DATA, T3 + psllq $1, T3 + pxor DATA, T3 + psllq $5, T3 + pxor DATA, T3 + psllq $57, T3 + movaps T3, T2 + pslldq $8, T2 + psrldq $8, T3 + pxor T2, DATA + pxor T3, T1 + + # second phase of the reduction + movaps DATA, T2 + psrlq $5, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA + ret + +/* void clmul_ghash_mul(char *dst, const be128 *shash) */ +ENTRY(clmul_ghash_mul) + movups (%rdi), DATA + movups (%rsi), SHASH + movaps .Lbswap_mask, BSWAP + pshufb BSWAP, DATA + call __clmul_gf128mul_ble + pshufb BSWAP, DATA + movups DATA, (%rdi) + ret + +/* + * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + * const be128 *shash); + */ +ENTRY(clmul_ghash_update) + cmp $16, %rdx + jb .Lupdate_just_ret # check length + movaps .Lbswap_mask, BSWAP + movups (%rdi), DATA + movups (%rcx), SHASH + pshufb BSWAP, DATA +.align 4 +.Lupdate_loop: + movups (%rsi), IN1 + pshufb BSWAP, IN1 + pxor IN1, DATA + call __clmul_gf128mul_ble + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lupdate_loop + pshufb BSWAP, DATA + movups DATA, (%rdi) +.Lupdate_just_ret: + ret + +/* + * void clmul_ghash_setkey(be128 *shash, const u8 *key); + * + * Calculate hash_key << 1 mod poly + */ +ENTRY(clmul_ghash_setkey) + movaps .Lbswap_mask, BSWAP + movups (%rsi), %xmm0 + pshufb BSWAP, %xmm0 + movaps %xmm0, %xmm1 + psllq $1, %xmm0 + psrlq $63, %xmm1 + movaps %xmm1, %xmm2 + pslldq $8, %xmm1 + psrldq $8, %xmm2 + por %xmm1, %xmm0 + # reduction + pshufd $0b00100100, %xmm2, %xmm1 + pcmpeqd .Ltwo_one, %xmm1 + pand .Lpoly, %xmm1 + pxor %xmm1, %xmm0 + movups %xmm0, (%rdi) + ret diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 000000000000..65d409644d72 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -0,0 +1,333 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains glue code. + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +void clmul_ghash_mul(char *dst, const be128 *shash); + +void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +void clmul_ghash_setkey(be128 *shash, const u8 *key); + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +struct ghash_ctx { + be128 shash; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + clmul_ghash_setkey(&ctx->shash, key); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + + kernel_fpu_begin(); + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + clmul_ghash_mul(dst, &ctx->shash); + } + + clmul_ghash_update(dst, src, srclen, &ctx->shash); + kernel_fpu_end(); + + if (srclen & 0xf) { + src += srclen - (srclen & 0xf); + srclen &= 0xf; + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + while (srclen--) + *dst++ ^= *src++; + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *dst = dctx->buffer; + + if (dctx->bytes) { + u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + while (dctx->bytes--) + *tmp++ ^= 0; + + kernel_fpu_begin(); + clmul_ghash_mul(dst, &ctx->shash); + kernel_fpu_end(); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "__ghash", + .cra_driver_name = "__ghash-pclmulqdqni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_init(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); + } +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) + & CRYPTO_TFM_RES_MASK); + + return 0; +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .halg = { + .digestsize = GHASH_DIGEST_SIZE, + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-clmulni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, + }, +}; + +static int __init ghash_pclmulqdqni_mod_init(void) +{ + int err; + + if (!cpu_has_pclmulqdq) { + printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" + " detected.\n"); + return -ENODEV; + } + + err = crypto_register_shash(&ghash_alg); + if (err) + goto err_out; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); +err_out: + return err; +} + +static void __exit ghash_pclmulqdqni_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_pclmulqdqni_mod_init); +module_exit(ghash_pclmulqdqni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " + "acclerated by PCLMULQDQ-NI"); +MODULE_ALIAS("ghash"); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 9cfc88b97742..613700f27a4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) +#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/crypto/Kconfig b/crypto/Kconfig index 26b5dd0cb564..fd6871102b60 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -440,6 +440,14 @@ config CRYPTO_WP512 See also: +config CRYPTO_GHASH_CLMUL_NI_INTEL + tristate "GHASH digest algorithm (CLMUL-NI accelerated)" + select CRYPTO_SHASH + select CRYPTO_CRYPTD + help + GHASH is message digest algorithm for GCM (Galois/Counter Mode). + The implementation is accelerated by CLMUL-NI of Intel. + comment "Ciphers" config CRYPTO_AES diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 35335825a4ef..f8ae0d94a647 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -711,6 +711,13 @@ struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm) } EXPORT_SYMBOL_GPL(cryptd_ahash_child); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req) +{ + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + return &rctx->desc; +} +EXPORT_SYMBOL_GPL(cryptd_shash_desc); + void cryptd_free_ahash(struct cryptd_ahash *tfm) { crypto_free_ahash(&tfm->base); diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index 2f65a6e8ea4d..1c96b255017c 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -39,6 +39,7 @@ static inline struct cryptd_ahash *__cryptd_ahash_cast( struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm); +struct shash_desc *cryptd_shash_desc(struct ahash_request *req); void cryptd_free_ahash(struct cryptd_ahash *tfm); #endif -- cgit v1.2.3 From 13b79b971564ddd0f14e706592472adc8199e912 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 20 Oct 2009 16:20:47 +0900 Subject: crypto: aesni-intel - Fix irq_fpu_usable usage When renaming kernel_fpu_using to irq_fpu_usable, the semantics of the function is changed too, from mesuring whether kernel is using FPU, that is, the FPU is NOT available, to measuring whether FPU is usable, that is, the FPU is available. But the usage of irq_fpu_usable in aesni-intel_glue.c is not changed accordingly. This patch fixes this. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 585edebe12cf..49c552c060e9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, return -EINVAL; } - if (irq_fpu_usable()) + if (!irq_fpu_usable()) err = crypto_aes_expand_key(ctx, in_key, key_len); else { kernel_fpu_begin(); @@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (irq_fpu_usable()) + if (!irq_fpu_usable()) crypto_aes_encrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (irq_fpu_usable()) + if (!irq_fpu_usable()) crypto_aes_decrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req) struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); memcpy(cryptd_req, req, sizeof(*req)); @@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req) struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); memcpy(cryptd_req, req, sizeof(*req)); -- cgit v1.2.3 From 06ed6ba5ecb771cc3a967838a4bb1d9cbd8786b9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 20 Oct 2009 12:55:24 -0400 Subject: x86: Fix group attribute decoding bug Fix a typo in inat_get_group_attribute() which should refer inat_group_tables, not inat_escape_tables. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165524.4145.97333.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/inat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 054656a01dfd..3fb5998b823e 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -68,7 +68,7 @@ insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, if (!table) return inat_group_common_attribute(grp_attr); if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { - table = inat_escape_tables[n][m]; + table = inat_group_tables[n][m]; if (!table) return inat_group_common_attribute(grp_attr); } -- cgit v1.2.3 From 9983d60d74db9e544c6cb6f65351849fe8e9c1de Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 20 Oct 2009 12:55:31 -0400 Subject: x86: Add AES opcodes to opcode map Add Intel AES opcodes to x86 opcode map. These opcodes are used in arch/x86/crypt/aesni-intel_asm.S. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165531.4145.21872.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 894497f77808..701c4678687b 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -567,7 +567,7 @@ fe: paddd Pq,Qq | paddd Vdq,Wdq (66) ff: EndTable -Table: 3-byte opcode 1 +Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 # 0x0f 0x38 0x00-0x0f 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) @@ -642,11 +642,16 @@ Referrer: 3-byte escape 1 41: phminposuw Vdq,Wdq (66) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) +db: aesimc Vdq,Wdq (66) +dc: aesenc Vdq,Wdq (66) +dd: aesenclast Vdq,Wdq (66) +de: aesdec Vdq,Wdq (66) +df: aesdeclast Vdq,Wdq (66) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) EndTable -Table: 3-byte opcode 2 +Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 # 0x0f 0x3a 0x00-0xff 08: roundps Vdq,Wdq,Ib (66) @@ -671,6 +676,7 @@ Referrer: 3-byte escape 2 61: pcmpestri Vdq,Wdq,Ib (66) 62: pcmpistrm Vdq,Wdq,Ib (66) 63: pcmpistri Vdq,Wdq,Ib (66) +df: aeskeygenassist Vdq,Wdq,Ib (66) EndTable GrpTable: Grp1 -- cgit v1.2.3 From 02dd0a0613e0d84c7dd8315e3fe6204d005b7c79 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 20 Oct 2009 14:36:15 -0500 Subject: x86, UV: Set DELIVERY_MODE=4 for vector=NMI_VECTOR in uv_hub_send_ipi() When sending a NMI_VECTOR IPI using the UV_HUB_IPI_INT register, we need to ensure the delivery mode field of that register has NMI delivery selected. This makes those IPIs true NMIs, instead of flat IPIs. It matters to reboot sequences and KGDB, both of which use NMI IPIs. Signed-off-by: Robin Holt Acked-by: Jack Steiner Cc: Martin Hicks Cc: LKML-Reference: <20091020193620.877322000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 94908a08020a..d1414af98559 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include /* @@ -435,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) { unsigned long val; + unsigned long dmode = dest_Fixed; + + if (vector == NMI_VECTOR) + dmode = dest_NMI; val = (1UL << UVH_IPI_INT_SEND_SHFT) | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | + (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -- cgit v1.2.3 From 9bf4e7fba8006d19846fec877b6da0616b2772de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Oct 2009 14:39:51 +0200 Subject: x86, instruction decoder: Fix test_get_len build rules Add the kernel source include file as well to the include files search path, to fix this build bug: In file included from arch/x86/tools/test_get_len.c:28: arch/x86/lib/insn.c:21:26: error: linux/string.h: No such file or directory Cc: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165531.4145.21872.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/tools/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 1bd006c81564..5e295d95dc25 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -8,8 +8,8 @@ posttest: $(obj)/test_get_len vmlinux hostprogs-y := test_get_len # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ -# Dependancies are also needed. +# Dependencies are also needed. $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c -- cgit v1.2.3 From 14a3f40aafacde1dfd6912327ae14df4baf10304 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 23 Oct 2009 07:31:01 -0700 Subject: x86: Remove STACKPROTECTOR_ALL STACKPROTECTOR_ALL has a really high overhead (runtime and stack footprint) and is not really worth it protection wise (the normal STACKPROTECTOR is in effect for all functions with buffers already), so lets just remove the option entirely. Reported-by: Dave Jones Reported-by: Chuck Ebbert Signed-off-by: Arjan van de Ven Cc: Eric Sandeen LKML-Reference: <20091023073101.3dce4ebb@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 1 - 2 files changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 07e01149e3bf..72ace9515a07 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1443,12 +1443,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. -config CC_STACKPROTECTOR_ALL - bool - config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - select CC_STACKPROTECTOR_ALL ---help--- This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a012ee8ef803..d2d24c9ee64d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) stackp-y := -fstack-protector - stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all KBUILD_CFLAGS += $(stackp-y) else $(warning stack protector enabled but no compiler support) -- cgit v1.2.3 From ae1b22f6e46c03cede7cea234d0bf2253b4261cf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 26 Oct 2009 14:26:04 +1030 Subject: x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium Commit 79e1dd05d1a22 "x86: Provide an alternative() based cmpxchg64()" broke lguest, even on systems which have cmpxchg8b support. The emulation code gets used until alternatives get run, but it contains native instructions, not their paravirt alternatives. The simplest fix is to turn this code off except for 386 and 486 builds. Reported-by: Johannes Stezenbach Signed-off-by: Rusty Russell Acked-by: H. Peter Anvin Cc: lguest@ozlabs.org Cc: Arjan van de Ven Cc: Jeremy Fitzhardinge Cc: Linus Torvalds LKML-Reference: <200910261426.05769.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index f2824fb8c79c..2649840d888f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -400,7 +400,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM + depends on !M386 && !M486 # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.3 From 72ed7de74e8f0fad0d8e567ae1f987b740accb3f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 26 Oct 2009 11:11:43 +0100 Subject: x86: crash_dump: Fix non-pae kdump kernel memory accesses Non-PAE 32-bit dump kernels may wrap an address around 4G and poke unwanted space. ptes there are 32-bit long, and since pfn << PAGE_SIZE may exceed this limit, high pfn bits are cropped and wrong address mapped by kmap_atomic_pfn in copy_oldmem_page. Don't allow this behavior in non-PAE kdump kernels by checking pfns passed into copy_oldmem_page. In the case of failure, userspace process gets EFAULT. [v2] - fix comments - move ifdefs inside the function Signed-off-by: Jiri Slaby Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Simon Horman Cc: Paul Mundt LKML-Reference: <1256551903-30567-1-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index f7cdb3b457aa..cd97ce18c29d 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -16,6 +16,22 @@ static void *kdump_buf_page; /* Stores the physical address of elf header of crash image. */ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; +static inline bool is_crashed_pfn_valid(unsigned long pfn) +{ +#ifndef CONFIG_X86_PAE + /* + * non-PAE kdump kernel executed from a PAE one will crop high pte + * bits and poke unwanted space counting again from address 0, we + * don't want that. pte must fit into unsigned long. In fact the + * test checks high 12 bits for being zero (pfn will be shifted left + * by PAGE_SHIFT). + */ + return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn; +#else + return true; +#endif +} + /** * copy_oldmem_page - copy one page from "oldmem" * @pfn: page frame number to be copied @@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; + if (!is_crashed_pfn_valid(pfn)) + return -EFAULT; + vaddr = kmap_atomic_pfn(pfn, KM_PTE0); if (!userbuf) { -- cgit v1.2.3 From 81766741fe1eee3884219e8daaf03f466f2ed52f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Oct 2009 15:20:29 +0000 Subject: x86-64: Fix register leak in 32-bit syscall audting Restoring %ebp after the call to audit_syscall_exit() is not only unnecessary (because the register didn't get clobbered), but in the sysenter case wasn't even doing the right thing: It loaded %ebp from a location below the top of stack (RBP < ARGOFFSET), i.e. arbitrary kernel data got passed back to user mode in the register. Signed-off-by: Jan Beulich Acked-by: Roland McGrath Cc: LKML-Reference: <4AE5CC4D020000780001BD13@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 1733f9f65e82..581b0568fe19 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -204,7 +204,7 @@ sysexit_from_sys_call: movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ .endm - .macro auditsys_exit exit,ebpsave=RBP + .macro auditsys_exit exit testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz ia32_ret_from_sys_call TRACE_IRQS_ON @@ -217,7 +217,6 @@ sysexit_from_sys_call: call audit_syscall_exit GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ - movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF @@ -351,7 +350,7 @@ cstar_auditsys: jmp cstar_dispatch sysretl_audit: - auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ + auditsys_exit sysretl_from_sys_call #endif cstar_tracesys: -- cgit v1.2.3 From 772be899bc022ef2b911c3611b487d417e3269c3 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 20 Oct 2009 12:54:02 +0800 Subject: x86: Make EFI RTC function depend on 32bit again The EFI RTC functions are only available on 32 bit. commit 7bd867df (x86: Move get/set_wallclock to x86_platform_ops) removed the 32bit dependency which leads to boot crashes on 64bit EFI systems. Add the dependency back. Solves: http://bugzilla.kernel.org/show_bug.cgi?id=14466 Tested-by: Matthew Garrett Signed-off-by: Feng Tang LKML-Reference: <20091020125402.028d66d5@feng-desktop> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index ad5bd988fb79..cdcfb122f256 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -454,8 +454,10 @@ void __init efi_init(void) if (add_efi_memmap) do_add_efi_memmap(); +#ifdef CONFIG_X86_32 x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; +#endif /* Setup for EFI runtime service */ reboot_type = BOOT_EFI; -- cgit v1.2.3 From 6f9b41006af1bc489030f84ee247abc0df1edccd Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 27 Oct 2009 11:01:38 +0100 Subject: x86, apic: Clear APIC Timer Initial Count Register on shutdown Commit a98f8fd24fb24fcb9a359553e64dd6aac5cf4279 (x86: apic reset counter on shutdown) set the counter to max to avoid spurious interrupts when the timer is re-enabled. (In theory) you'll still get a spurious interrupt if spending more than 344 seconds with this interrupt disabled and then unmasking it. The right thing to do is to clear the register. This disables the interrupt from happening (at least it does on AMD hardware). Signed-off-by: Andreas Herrmann LKML-Reference: <20091027100138.GB30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dce93d4b0eaf..4c689f45b238 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -444,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0xffffffff); + apic_write(APIC_TMICT, 0); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ -- cgit v1.2.3 From 973df35ed9ff7806403e793a2ad7e9bd4c2fd2a9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 27 Oct 2009 16:54:19 -0700 Subject: xen: set up mmu_ops before trying to set any ptes xen_setup_stackprotector() ends up trying to set page protections, so we need to have vm_mmu_ops set up before trying to do so. Failing to do so causes an early boot crash. [ Impact: Fix early crash under Xen. ] Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/enlighten.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3439616d69f1..23a4d80fb39e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1075,6 +1075,8 @@ asmlinkage void __init xen_start_kernel(void) * Set up some pagetable state before starting to set any ptes. */ + xen_init_mmu_ops(); + /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; if (!xen_initial_domain()) @@ -1099,7 +1101,6 @@ asmlinkage void __init xen_start_kernel(void) */ xen_setup_stackprotector(); - xen_init_mmu_ops(); xen_init_irq_ops(); xen_init_cpuid_mask(); -- cgit v1.2.3 From ca0207114f1708b563f510b7781a360ec5b98359 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2009 18:02:26 +0100 Subject: x86/amd-iommu: Un__init function required on shutdown The function iommu_feature_disable is required on system shutdown to disable the IOMMU but it is marked as __init. This may result in a panic if the memory is reused. This patch fixes this bug. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 498c8c79aaa5..1e423b2add15 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -240,7 +240,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; -- cgit v1.2.3 From 7f387d3f2421781610588faa2f49ae5f1737b137 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:04 -0400 Subject: x86: Fix SSE opcode map bug Fix superscripts position because some superscripts of SSE opcode are not put in correct position. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204204.30545.97296.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 701c4678687b..efef3cada84e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -401,9 +401,9 @@ Referrer: 2-byte escape 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) -65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66) 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) -67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66) 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) @@ -425,8 +425,8 @@ Referrer: 2-byte escape 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd -7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7c: haddps Vps,Wps (F2) | haddpd Vpd,Wpd (66) +7d: hsubps Vps,Wps (F2) | hsubpd Vpd,Wpd (66) 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) 7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) # 0x0f 0x80-0x8f @@ -574,7 +574,7 @@ Referrer: 3-byte escape 1 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) -04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66) 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) -- cgit v1.2.3 From 04d46c1b13b02e1e5c24eb270a01cf3f94ee4d04 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:11 -0400 Subject: x86: Merge INAT_REXPFX into INAT_PFX_* Merge INAT_REXPFX into INAT_PFX_* macro and rename it to INAT_PFX_REX. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204211.30545.58090.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/inat.h | 36 ++++++++++++++++++++---------------- arch/x86/lib/insn.c | 2 +- arch/x86/tools/gen-insn-attr-x86.awk | 6 +++--- 3 files changed, 24 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 2866fddd1848..c2487d2aca25 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -30,10 +30,11 @@ #define INAT_OPCODE_TABLE_SIZE 256 #define INAT_GROUP_TABLE_SIZE 8 -/* Legacy instruction prefixes */ +/* Legacy last prefixes */ #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ #define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ #define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +/* Other Legacy prefixes */ #define INAT_PFX_LOCK 4 /* 0xF0 */ #define INAT_PFX_CS 5 /* 0x2E */ #define INAT_PFX_DS 6 /* 0x3E */ @@ -42,8 +43,11 @@ #define INAT_PFX_GS 9 /* 0x65 */ #define INAT_PFX_SS 10 /* 0x36 */ #define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ -#define INAT_LPREFIX_MAX 3 +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 /* Immediate size */ #define INAT_IMM_BYTE 1 @@ -75,12 +79,11 @@ #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) /* Flags */ #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) -#define INAT_REXPFX (1 << INAT_FLAG_OFFS) -#define INAT_MODRM (1 << (INAT_FLAG_OFFS + 1)) -#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 2)) -#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 3)) -#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 4)) -#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -97,9 +100,10 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_attr_t esc_attr); /* Attribute checking functions */ -static inline int inat_is_prefix(insn_attr_t attr) +static inline int inat_is_legacy_prefix(insn_attr_t attr) { - return attr & INAT_PFX_MASK; + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; } static inline int inat_is_address_size_prefix(insn_attr_t attr) @@ -112,9 +116,14 @@ static inline int inat_is_operand_size_prefix(insn_attr_t attr) return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; } +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + static inline int inat_last_prefix_id(insn_attr_t attr) { - if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX) + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) return 0; else return attr & INAT_PFX_MASK; @@ -155,11 +164,6 @@ static inline int inat_immediate_size(insn_attr_t attr) return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; } -static inline int inat_is_rex_prefix(insn_attr_t attr) -{ - return attr & INAT_REXPFX; -} - static inline int inat_has_modrm(insn_attr_t attr) { return attr & INAT_MODRM; diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index dfd56a30053f..9f483179a8a6 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -69,7 +69,7 @@ void insn_get_prefixes(struct insn *insn) lb = 0; b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); - while (inat_is_prefix(attr)) { + while (inat_is_legacy_prefix(attr)) { /* Skip if same prefix */ for (i = 0; i < nb; i++) if (prefixes->bytes[i] == b) diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 19ba096b7dd1..7d5492951e22 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -278,7 +278,7 @@ function convert_operands(opnd, i,imm,mod) # check REX prefix if (match(opcode, rex_expr)) - flags = add_flags(flags, "INAT_REXPFX") + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") # check coprocessor escape : TODO if (match(opcode, fpu_expr)) @@ -316,7 +316,7 @@ END { # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ - "[INAT_LPREFIX_MAX + 1] = {" + "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < geid; i++) for (j = 0; j < max_lprefix; j++) if (etable[i,j]) @@ -325,7 +325,7 @@ END { # print group opcode map's array print "/* Group opcode map array */" print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ - "[INAT_LPREFIX_MAX + 1] = {" + "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < ggid; i++) for (j = 0; j < max_lprefix; j++) if (gtable[i,j]) -- cgit v1.2.3 From 82cb57028c864822c5a260f806d051e2ce28c86a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:19 -0400 Subject: x86: Add pclmulq to x86 opcode map Add pclmulq opcode to x86 opcode map. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204219.30545.82039.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index efef3cada84e..1f41246e6e3c 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -672,6 +672,7 @@ Referrer: 3-byte escape 2 40: dpps Vdq,Wdq,Ib (66) 41: dppd Vdq,Wdq,Ib (66) 42: mpsadbw Vdq,Wdq,Ib (66) +44: pclmulq Vdq,Wdq,Ib (66) 60: pcmpestrm Vdq,Wdq,Ib (66) 61: pcmpestri Vdq,Wdq,Ib (66) 62: pcmpistrm Vdq,Wdq,Ib (66) -- cgit v1.2.3 From e0e492e99b372c6990a5daca9e4683c341f1330e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:27 -0400 Subject: x86: AVX instruction set decoder support Add Intel AVX(Advanced Vector Extensions) instruction set support to x86 instruction decoder. This adds insn.vex_prefix field for storing VEX prefixes, and introduces some original tags for expressing opcodes attributes. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204226.30545.23451.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/inat.h | 32 ++- arch/x86/include/asm/insn.h | 43 +++- arch/x86/lib/inat.c | 12 + arch/x86/lib/insn.c | 52 +++++ arch/x86/lib/x86-opcode-map.txt | 431 ++++++++++++++++++----------------- arch/x86/tools/gen-insn-attr-x86.awk | 94 ++++++-- 6 files changed, 431 insertions(+), 233 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index c2487d2aca25..205b063e3e32 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -32,8 +32,8 @@ /* Legacy last prefixes */ #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ -#define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ -#define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ /* Other Legacy prefixes */ #define INAT_PFX_LOCK 4 /* 0xF0 */ #define INAT_PFX_CS 5 /* 0x2E */ @@ -45,6 +45,9 @@ #define INAT_PFX_ADDRSZ 11 /* 0x67 */ /* x86-64 REX prefix */ #define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -84,6 +87,8 @@ #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -98,6 +103,9 @@ extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); /* Attribute checking functions */ static inline int inat_is_legacy_prefix(insn_attr_t attr) @@ -129,6 +137,17 @@ static inline int inat_last_prefix_id(insn_attr_t attr) return attr & INAT_PFX_MASK; } +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + static inline int inat_is_escape(insn_attr_t attr) { return attr & INAT_ESC_MASK; @@ -189,4 +208,13 @@ static inline int inat_has_variant(insn_attr_t attr) return attr & INAT_VARIANT; } +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} #endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 12b4e3751d3f..96c2e0ad04ca 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -39,6 +39,7 @@ struct insn { * prefixes.bytes[3]: last prefix */ struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ struct insn_field opcode; /* * opcode.bytes[0]: opcode1 * opcode.bytes[1]: opcode2 @@ -80,6 +81,19 @@ struct insn { #define X86_REX_X(rex) ((rex) & 2) #define X86_REX_B(rex) ((rex) & 1) +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + /* The last prefix is needed for two-byte and three-byte opcodes */ static inline insn_byte_t insn_last_prefix(struct insn *insn) { @@ -114,15 +128,42 @@ static inline void kernel_insn_init(struct insn *insn, const void *kaddr) #endif } +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + /* Offset of each field from kaddr */ static inline int insn_offset_rex_prefix(struct insn *insn) { return insn->prefixes.nbytes; } -static inline int insn_offset_opcode(struct insn *insn) +static inline int insn_offset_vex_prefix(struct insn *insn) { return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; } +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} static inline int insn_offset_modrm(struct insn *insn) { return insn_offset_opcode(insn) + insn->opcode.nbytes; diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 3fb5998b823e..46fc4ee09fc4 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -76,3 +76,15 @@ insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, inat_group_common_attribute(grp_attr); } +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + return table[opcode]; +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 9f483179a8a6..9f33b984d0ef 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -28,6 +28,9 @@ #define peek_next(t, insn) \ ({t r; r = *(t*)insn->next_byte; r; }) +#define peek_nbyte_next(t, insn, n) \ + ({t r; r = *(t*)((insn)->next_byte + n); r; }) + /** * insn_init() - initialize struct insn * @insn: &struct insn to be initialized @@ -107,6 +110,7 @@ found: insn->prefixes.bytes[3] = lb; } + /* Decode REX prefix */ if (insn->x86_64) { b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); @@ -120,6 +124,39 @@ found: } } insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + prefixes->got = 1; return; } @@ -147,6 +184,18 @@ void insn_get_opcode(struct insn *insn) op = get_next(insn_byte_t, insn); opcode->bytes[0] = op; opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + insn->attr = inat_get_opcode_attribute(op); while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ @@ -155,6 +204,9 @@ void insn_get_opcode(struct insn *insn) pfx = insn_last_prefix(insn); insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: opcode->got = 1; } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1f41246e6e3c..9887bfeeb2db 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -3,6 +3,7 @@ # # Table: table-name # Referrer: escaped-name +# AVXcode: avx-code # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # (or) # opcode: escape # escaped-name @@ -13,9 +14,16 @@ # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # EndTable # +# AVX Superscripts +# (VEX): this opcode can accept VEX prefix. +# (oVEX): this opcode requires VEX prefix. +# (o128): this opcode only supports 128bit VEX. +# (o256): this opcode only supports 256bit VEX. +# Table: one byte opcode Referrer: +AVXcode: # 0x00 - 0x0f 00: ADD Eb,Gb 01: ADD Ev,Gv @@ -225,8 +233,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) -c5: LDS Gz,Mp (i64) +c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) +c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) c6: Grp11 Eb,Ib (1A) c7: Grp11 Ev,Iz (1A) c8: ENTER Iw,Ib @@ -290,8 +298,9 @@ fe: Grp4 (1A) ff: Grp5 (1A) EndTable -Table: 2-byte opcode # First Byte is 0x0f +Table: 2-byte opcode (0x0f) Referrer: 2-byte escape +AVXcode: 1 # 0x0f 0x00-0x0f 00: Grp6 (1A) 01: Grp7 (1A) @@ -311,14 +320,14 @@ Referrer: 2-byte escape # 3DNow! uses the last imm byte as opcode extension. 0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f -10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) -11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) -12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) -13: mpvlps Mq,Vq | movlpd Mq,Vq (66) -14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) -15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) -16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) -17: movhps Mq,Vq | movhpd Mq,Vq (66) +10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) +11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) +12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) +13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) +14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) +15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) +16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) +17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 18: Grp16 (1A) 19: 1a: @@ -336,14 +345,14 @@ Referrer: 2-byte escape 25: 26: 27: -28: movaps Vps,Wps | movapd Vpd,Wpd (66) -29: movaps Wps,Vps | movapd Wpd,Vpd (66) -2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) -2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) -2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) -2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) -2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) -2f: comiss Vss,Wss | comisd Vsd,Wsd (66) +28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) +29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) +2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) +2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) +2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -379,56 +388,56 @@ Referrer: 2-byte escape 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) -51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) -52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) -53: rcpps Vps,Wps | rcpss Vss,Wss (F3) -54: andps Vps,Wps | andpd Vpd,Wpd (66) -55: andnps Vps,Wps | andnpd Vpd,Wpd (66) -56: orps Vps,Wps | orpd Vpd,Wpd (66) -57: xorps Vps,Wps | xorpd Vpd,Wpd (66) -58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) -59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) -5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) -5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) -5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) -5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) -5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) -5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) +50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) +51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) +52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) +53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) +54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) +55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) +56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) +57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) +58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) +59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) +5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) +5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) +5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) +5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) +5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) +5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) # 0x0f 0x60-0x6f -60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) -61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) -62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) -63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) -64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) -65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66) -66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) -67: packuswb Pq,Qq | packuswb Vdq,Wdq (66) -68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) -69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) -6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) -6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) -6c: punpcklqdq Vdq,Wdq (66) -6d: punpckhqdq Vdq,Wdq (66) -6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) -6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) +6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) +6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) # 0x0f 0x70-0x7f -70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) -75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) -76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) -77: emms +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) +77: emms/vzeroupper/vzeroall (VEX) 78: VMREAD Ed/q,Gd/q 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: haddps Vps,Wps (F2) | haddpd Vpd,Wpd (66) -7d: hsubps Vps,Wps (F2) | hsubpd Vpd,Wpd (66) -7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) -7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) +7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) +7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) @@ -500,11 +509,11 @@ bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) +c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) c3: movnti Md/q,Gd/q -c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) -c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) -c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) +c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -515,77 +524,78 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) -d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) -d2: psrld Pq,Qq | psrld Vdq,Wdq (66) -d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) -d4: paddq Pq,Qq | paddq Vdq,Wdq (66) -d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) -d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) -d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) -d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) -da: pminub Pq,Qq | pminub Vdq,Wdq (66) -db: pand Pq,Qq | pand Vdq,Wdq (66) -dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) -dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) -de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) -df: pandn Pq,Qq | pandn Vdq,Wdq (66) +d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) +d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) +da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) +db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) +df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) # 0x0f 0xe0-0xef -e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) -e1: psraw Pq,Qq | psraw Vdq,Wdq (66) -e2: psrad Pq,Qq | psrad Vdq,Wdq (66) -e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) -e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) -e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) -e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) -e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) -e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) -e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) -ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) -eb: por Pq,Qq | por Vdq,Wdq (66) -ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) -ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) -ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) -ef: pxor Pq,Qq | pxor Vdq,Wdq (66) +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) +e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) +eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) # 0x0f 0xf0-0xff -f0: lddqu Vdq,Mdq (F2) -f1: psllw Pq,Qq | psllw Vdq,Wdq (66) -f2: pslld Pq,Qq | pslld Vdq,Wdq (66) -f3: psllq Pq,Qq | psllq Vdq,Wdq (66) -f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) -f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) -f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) -f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) -f8: psubb Pq,Qq | psubb Vdq,Wdq (66) -f9: psubw Pq,Qq | psubw Vdq,Wdq (66) -fa: psubd Pq,Qq | psubd Vdq,Wdq (66) -fb: psubq Pq,Qq | psubq Vdq,Wdq (66) -fc: paddb Pq,Qq | paddb Vdq,Wdq (66) -fd: paddw Pq,Qq | paddw Vdq,Wdq (66) -fe: paddd Pq,Qq | paddd Vdq,Wdq (66) +f0: lddqu Vdq,Mdq (F2),(VEX) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) ff: EndTable Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 +AVXcode: 2 # 0x0f 0x38 0x00-0x0f -00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) -01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) -02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) -03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) -04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66) -05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) -06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) -07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) -08: psignb Pq,Qq | psignb Vdq,Wdq (66) -09: psignw Pq,Qq | psignw Vdq,Wdq (66) -0a: psignd Pq,Qq | psignd Vdq,Wdq (66) -0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) -0c: -0d: -0e: -0f: +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) +08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) +09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) +0c: Vpermilps /r (66),(oVEX) +0d: Vpermilpd /r (66),(oVEX) +0e: vtestps /r (66),(oVEX) +0f: vtestpd /r (66),(oVEX) # 0x0f 0x38 0x10-0x1f 10: pblendvb Vdq,Wdq (66) 11: @@ -594,90 +604,99 @@ Referrer: 3-byte escape 1 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) 16: -17: ptest Vdq,Wdq (66) -18: -19: -1a: +17: ptest Vdq,Wdq (66),(VEX) +18: vbroadcastss /r (66),(oVEX) +19: vbroadcastsd /r (66),(oVEX),(o256) +1a: vbroadcastf128 /r (66),(oVEX),(o256) 1b: -1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) -1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) -1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 1f: # 0x0f 0x38 0x20-0x2f -20: pmovsxbw Vdq,Udq/Mq (66) -21: pmovsxbd Vdq,Udq/Md (66) -22: pmovsxbq Vdq,Udq/Mw (66) -23: pmovsxwd Vdq,Udq/Mq (66) -24: pmovsxwq Vdq,Udq/Md (66) -25: pmovsxdq Vdq,Udq/Mq (66) +20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) +21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) +22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) +23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) +24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) +25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 26: 27: -28: pmuldq Vdq,Wdq (66) -29: pcmpeqq Vdq,Wdq (66) -2a: movntdqa Vdq,Mdq (66) -2b: packusdw Vdq,Wdq (66) -2c: -2d: -2e: -2f: +28: pmuldq Vdq,Wdq (66),(VEX),(o128) +29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) +2a: movntdqa Vdq,Mdq (66),(VEX),(o128) +2b: packusdw Vdq,Wdq (66),(VEX),(o128) +2c: vmaskmovps(ld) /r (66),(oVEX) +2d: vmaskmovpd(ld) /r (66),(oVEX) +2e: vmaskmovps(st) /r (66),(oVEX) +2f: vmaskmovpd(st) /r (66),(oVEX) # 0x0f 0x38 0x30-0x3f -30: pmovzxbw Vdq,Udq/Mq (66) -31: pmovzxbd Vdq,Udq/Md (66) -32: pmovzxbq Vdq,Udq/Mw (66) -33: pmovzxwd Vdq,Udq/Mq (66) -34: pmovzxwq Vdq,Udq/Md (66) -35: pmovzxdq Vdq,Udq/Mq (66) +30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) +31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) +32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) +33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) +34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) +35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 36: -37: pcmpgtq Vdq,Wdq (66) -38: pminsb Vdq,Wdq (66) -39: pminsd Vdq,Wdq (66) -3a: pminuw Vdq,Wdq (66) -3b: pminud Vdq,Wdq (66) -3c: pmaxsb Vdq,Wdq (66) -3d: pmaxsd Vdq,Wdq (66) -3e: pmaxuw Vdq,Wdq (66) -3f: pmaxud Vdq,Wdq (66) +37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) +38: pminsb Vdq,Wdq (66),(VEX),(o128) +39: pminsd Vdq,Wdq (66),(VEX),(o128) +3a: pminuw Vdq,Wdq (66),(VEX),(o128) +3b: pminud Vdq,Wdq (66),(VEX),(o128) +3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) +3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) +3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) +3f: pmaxud Vdq,Wdq (66),(VEX),(o128) # 0x0f 0x38 0x4f-0xff -40: pmulld Vdq,Wdq (66) -41: phminposuw Vdq,Wdq (66) +40: pmulld Vdq,Wdq (66),(VEX),(o128) +41: phminposuw Vdq,Wdq (66),(VEX),(o128) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) -db: aesimc Vdq,Wdq (66) -dc: aesenc Vdq,Wdq (66) -dd: aesenclast Vdq,Wdq (66) -de: aesdec Vdq,Wdq (66) -df: aesdeclast Vdq,Wdq (66) +db: aesimc Vdq,Wdq (66),(VEX),(o128) +dc: aesenc Vdq,Wdq (66),(VEX),(o128) +dd: aesenclast Vdq,Wdq (66),(VEX),(o128) +de: aesdec Vdq,Wdq (66),(VEX),(o128) +df: aesdeclast Vdq,Wdq (66),(VEX),(o128) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 +AVXcode: 3 # 0x0f 0x3a 0x00-0xff -08: roundps Vdq,Wdq,Ib (66) -09: roundpd Vdq,Wdq,Ib (66) -0a: roundss Vss,Wss,Ib (66) -0b: roundsd Vsd,Wsd,Ib (66) -0c: blendps Vdq,Wdq,Ib (66) -0d: blendpd Vdq,Wdq,Ib (66) -0e: pblendw Vdq,Wdq,Ib (66) -0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) -14: pextrb Rd/Mb,Vdq,Ib (66) -15: pextrw Rd/Mw,Vdq,Ib (66) -16: pextrd/pextrq Ed/q,Vdq,Ib (66) -17: extractps Ed,Vdq,Ib (66) -20: pinsrb Vdq,Rd/q/Mb,Ib (66) -21: insertps Vdq,Udq/Md,Ib (66) -22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) -40: dpps Vdq,Wdq,Ib (66) -41: dppd Vdq,Wdq,Ib (66) -42: mpsadbw Vdq,Wdq,Ib (66) -44: pclmulq Vdq,Wdq,Ib (66) -60: pcmpestrm Vdq,Wdq,Ib (66) -61: pcmpestri Vdq,Wdq,Ib (66) -62: pcmpistrm Vdq,Wdq,Ib (66) -63: pcmpistri Vdq,Wdq,Ib (66) -df: aeskeygenassist Vdq,Wdq,Ib (66) +04: vpermilps /r,Ib (66),(oVEX) +05: vpermilpd /r,Ib (66),(oVEX) +06: vperm2f128 /r,Ib (66),(oVEX),(o256) +08: roundps Vdq,Wdq,Ib (66),(VEX) +09: roundpd Vdq,Wdq,Ib (66),(VEX) +0a: roundss Vss,Wss,Ib (66),(VEX),(o128) +0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) +0c: blendps Vdq,Wdq,Ib (66),(VEX) +0d: blendpd Vdq,Wdq,Ib (66),(VEX) +0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) +14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) +15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) +16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) +17: extractps Ed,Vdq,Ib (66),(VEX),(o128) +18: vinsertf128 /r,Ib (66),(oVEX),(o256) +19: vextractf128 /r,Ib (66),(oVEX),(o256) +20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) +21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) +40: dpps Vdq,Wdq,Ib (66),(VEX) +41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) +42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) +44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) +4a: vblendvps /r,Ib (66),(oVEX) +4b: vblendvpd /r,Ib (66),(oVEX) +4c: vpblendvb /r,Ib (66),(oVEX),(o128) +60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) +61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) +62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) +63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) +df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) EndTable GrpTable: Grp1 @@ -785,29 +804,29 @@ GrpTable: Grp11 EndTable GrpTable: Grp12 -2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) -4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) -6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp13 -2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) -4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) -6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp14 -2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) -3: psrldq Udq,Ib (66),(11B) -6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) -7: pslldq Udq,Ib (66),(11B) +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) +3: psrldq Udq,Ib (66),(11B),(VEX),(o128) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) +7: pslldq Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp15 0: fxsave 1: fxstor -2: ldmxcsr -3: stmxcsr +2: ldmxcsr (VEX) +3: stmxcsr (VEX) 4: XSAVE 5: XRSTOR | lfence (11B) 6: mfence (11B) diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 7d5492951e22..e34e92a28eb6 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -13,6 +13,18 @@ function check_awk_implement() { return "" } +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + BEGIN { # Implementation error checking awkchecked = check_awk_implement() @@ -24,11 +36,15 @@ BEGIN { # Setup generating tables print "/* x86 opcode map generated from x86-opcode-map.txt */" - print "/* Do not change this code. */" + print "/* Do not change this code. */\n" ggid = 1 geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable - opnd_expr = "^[[:alpha:]]" + opnd_expr = "^[[:alpha:]/]" ext_expr = "^\\(" sep_expr = "^\\|$" group_expr = "^Grp[[:alnum:]]+" @@ -46,19 +62,19 @@ BEGIN { imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" - modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])" + modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" force64_expr = "\\([df]64\\)" rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO lprefix1_expr = "\\(66\\)" - delete lptable1 - lprefix2_expr = "\\(F2\\)" - delete lptable2 - lprefix3_expr = "\\(F3\\)" - delete lptable3 + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\(F2\\)" max_lprefix = 4 + vexok_expr = "\\(VEX\\)" + vexonly_expr = "\\(oVEX\\)" + prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" prefix_num["REPNE"] = "INAT_PFX_REPNE" @@ -71,12 +87,10 @@ BEGIN { prefix_num["SEG=GS"] = "INAT_PFX_GS" prefix_num["SEG=SS"] = "INAT_PFX_SS" prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" + prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" - delete table - delete etable - delete gtable - eid = -1 - gid = -1 + clear_vars() } function semantic_error(msg) { @@ -97,14 +111,12 @@ function array_size(arr, i,c) { /^Table:/ { print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); } /^Referrer:/ { - if (NF == 1) { - # primary opcode table - tname = "inat_primary_table" - eid = -1 - } else { + if (NF != 1) { # escape opcode table ref = "" for (i = 2; i <= NF; i++) @@ -114,6 +126,19 @@ function array_size(arr, i,c) { } } +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + /^GrpTable:/ { print "/* " $0 " */" if (!($2 in group)) @@ -162,30 +187,33 @@ function print_table(tbl,name,fmt,n) print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname } if (array_size(lptable1) != 0) { print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" } if (array_size(lptable2) != 0) { print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" } if (array_size(lptable3) != 0) { print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" } } print "" - delete table - delete lptable1 - delete lptable2 - delete lptable3 - gid = -1 - eid = -1 + clear_vars() } function add_flags(old,new) { @@ -284,6 +312,14 @@ function convert_operands(opnd, i,imm,mod) if (match(opcode, fpu_expr)) flags = add_flags(flags, "INAT_MODRM") + # check VEX only code + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + + # check VEX only code + if (match(ext, vexok_expr)) + flags = add_flags(flags, "INAT_VEXOK") + # check prefixes if (match(ext, prefix_expr)) { if (!prefix_num[opcode]) @@ -330,5 +366,15 @@ END { for (j = 0; j < max_lprefix; j++) if (gtable[i,j]) print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," print "};" } + -- cgit v1.2.3 From 3f7e454af1dd8b9cea410d9380d3f71477e94f2b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:35 -0400 Subject: x86: Add Intel FMA instructions to x86 opcode map Add Intel FMA(FUSED-MULTIPLY-ADD) instructions to x86 opcode map for x86 instruction decoder. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204235.30545.33997.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 9887bfeeb2db..a793da5e560e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -647,11 +647,43 @@ AVXcode: 2 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) -# 0x0f 0x38 0x4f-0xff +# 0x0f 0x38 0x40-0x8f 40: pmulld Vdq,Wdq (66),(VEX),(o128) 41: phminposuw Vdq,Wdq (66),(VEX),(o128) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) +# 0x0f 0x38 0x90-0xbf (FMA) +96: vfmaddsub132pd/ps /r (66),(VEX) +97: vfmsubadd132pd/ps /r (66),(VEX) +98: vfmadd132pd/ps /r (66),(VEX) +99: vfmadd132sd/ss /r (66),(VEX),(o128) +9a: vfmsub132pd/ps /r (66),(VEX) +9b: vfmsub132sd/ss /r (66),(VEX),(o128) +9c: vfnmadd132pd/ps /r (66),(VEX) +9d: vfnmadd132sd/ss /r (66),(VEX),(o128) +9e: vfnmsub132pd/ps /r (66),(VEX) +9f: vfnmsub132sd/ss /r (66),(VEX),(o128) +a6: vfmaddsub213pd/ps /r (66),(VEX) +a7: vfmsubadd213pd/ps /r (66),(VEX) +a8: vfmadd213pd/ps /r (66),(VEX) +a9: vfmadd213sd/ss /r (66),(VEX),(o128) +aa: vfmsub213pd/ps /r (66),(VEX) +ab: vfmsub213sd/ss /r (66),(VEX),(o128) +ac: vfnmadd213pd/ps /r (66),(VEX) +ad: vfnmadd213sd/ss /r (66),(VEX),(o128) +ae: vfnmsub213pd/ps /r (66),(VEX) +af: vfnmsub213sd/ss /r (66),(VEX),(o128) +b6: vfmaddsub231pd/ps /r (66),(VEX) +b7: vfmsubadd231pd/ps /r (66),(VEX) +b8: vfmadd231pd/ps /r (66),(VEX) +b9: vfmadd231sd/ss /r (66),(VEX),(o128) +ba: vfmsub231pd/ps /r (66),(VEX) +bb: vfmsub231sd/ss /r (66),(VEX),(o128) +bc: vfnmadd231pd/ps /r (66),(VEX) +bd: vfnmadd231sd/ss /r (66),(VEX),(o128) +be: vfnmsub231pd/ps /r (66),(VEX) +bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +# 0x0f 0x38 0xc0-0xff db: aesimc Vdq,Wdq (66),(VEX),(o128) dc: aesenc Vdq,Wdq (66),(VEX),(o128) dd: aesenclast Vdq,Wdq (66),(VEX),(o128) -- cgit v1.2.3 From 0f5e4816dbf38ce9488e611ca2296925c1e90d5e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:12 +0900 Subject: percpu: remove some sparse warnings Make the following changes to remove some sparse warnings. * Make DEFINE_PER_CPU_SECTION() declare __pcpu_unique_* before defining it. * Annotate pcpu_extend_area_map() that it is entered with pcpu_lock held, releases it and then reacquires it. * Make percpu related macros use unique nested variable names. * While at it, add pcpu prefix to __size_call[_return]() macros as to-be-implemented sparse annotations will add percpu specific stuff to these macros. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Cc: Rusty Russell --- arch/x86/include/asm/percpu.h | 26 +++++++++++------------ include/linux/percpu-defs.h | 1 + include/linux/percpu.h | 48 +++++++++++++++++++++---------------------- mm/percpu.c | 1 + 4 files changed, 39 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8b5ec19bdef4..0c44196b78ac 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -74,31 +74,31 @@ extern void __bad_percpu_size(void); #define percpu_to_op(op, var, val) \ do { \ - typedef typeof(var) T__; \ + typedef typeof(var) pto_T__; \ if (0) { \ - T__ tmp__; \ - tmp__ = (val); \ + pto_T__ pto_tmp__; \ + pto_tmp__ = (val); \ } \ switch (sizeof(var)) { \ case 1: \ asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ - : "qi" ((T__)(val))); \ + : "qi" ((pto_T__)(val))); \ break; \ case 2: \ asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)(val))); \ + : "ri" ((pto_T__)(val))); \ break; \ case 4: \ asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)(val))); \ + : "ri" ((pto_T__)(val))); \ break; \ case 8: \ asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ - : "re" ((T__)(val))); \ + : "re" ((pto_T__)(val))); \ break; \ default: __bad_percpu_size(); \ } \ @@ -106,31 +106,31 @@ do { \ #define percpu_from_op(op, var, constraint) \ ({ \ - typeof(var) ret__; \ + typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_arg(1)",%0" \ - : "=q" (ret__) \ + : "=q" (pfo_ret__) \ : constraint); \ break; \ case 2: \ asm(op "w "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ case 4: \ asm(op "l "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ case 8: \ asm(op "q "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ default: __bad_percpu_size(); \ } \ - ret__; \ + pfo_ret__; \ }) /* diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 9bd03193ecd4..5a5d6ce4bd55 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -60,6 +60,7 @@ #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ + extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \ __typeof__(type) per_cpu__##name diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 519d6876590f..522f421ec213 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -226,20 +226,20 @@ do { \ extern void __bad_size_call_parameter(void); -#define __size_call_return(stem, variable) \ -({ typeof(variable) ret__; \ +#define __pcpu_size_call_return(stem, variable) \ +({ typeof(variable) pscr_ret__; \ switch(sizeof(variable)) { \ - case 1: ret__ = stem##1(variable);break; \ - case 2: ret__ = stem##2(variable);break; \ - case 4: ret__ = stem##4(variable);break; \ - case 8: ret__ = stem##8(variable);break; \ + case 1: pscr_ret__ = stem##1(variable);break; \ + case 2: pscr_ret__ = stem##2(variable);break; \ + case 4: pscr_ret__ = stem##4(variable);break; \ + case 8: pscr_ret__ = stem##8(variable);break; \ default: \ __bad_size_call_parameter();break; \ } \ - ret__; \ + pscr_ret__; \ }) -#define __size_call(stem, variable, ...) \ +#define __pcpu_size_call(stem, variable, ...) \ do { \ switch(sizeof(variable)) { \ case 1: stem##1(variable, __VA_ARGS__);break; \ @@ -299,7 +299,7 @@ do { \ # ifndef this_cpu_read_8 # define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) # endif -# define this_cpu_read(pcp) __size_call_return(this_cpu_read_, (pcp)) +# define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) #endif #define _this_cpu_generic_to_op(pcp, val, op) \ @@ -322,7 +322,7 @@ do { \ # ifndef this_cpu_write_8 # define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) # endif -# define this_cpu_write(pcp, val) __size_call(this_cpu_write_, (pcp), (val)) +# define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) #endif #ifndef this_cpu_add @@ -338,7 +338,7 @@ do { \ # ifndef this_cpu_add_8 # define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) # endif -# define this_cpu_add(pcp, val) __size_call(this_cpu_add_, (pcp), (val)) +# define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) #endif #ifndef this_cpu_sub @@ -366,7 +366,7 @@ do { \ # ifndef this_cpu_and_8 # define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) # endif -# define this_cpu_and(pcp, val) __size_call(this_cpu_and_, (pcp), (val)) +# define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) #endif #ifndef this_cpu_or @@ -382,7 +382,7 @@ do { \ # ifndef this_cpu_or_8 # define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) # endif -# define this_cpu_or(pcp, val) __size_call(this_cpu_or_, (pcp), (val)) +# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) #endif #ifndef this_cpu_xor @@ -398,7 +398,7 @@ do { \ # ifndef this_cpu_xor_8 # define this_cpu_xor_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), ^=) # endif -# define this_cpu_xor(pcp, val) __size_call(this_cpu_or_, (pcp), (val)) +# define this_cpu_xor(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) #endif /* @@ -428,7 +428,7 @@ do { \ # ifndef __this_cpu_read_8 # define __this_cpu_read_8(pcp) (*__this_cpu_ptr(&(pcp))) # endif -# define __this_cpu_read(pcp) __size_call_return(__this_cpu_read_, (pcp)) +# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp)) #endif #define __this_cpu_generic_to_op(pcp, val, op) \ @@ -449,7 +449,7 @@ do { \ # ifndef __this_cpu_write_8 # define __this_cpu_write_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) # endif -# define __this_cpu_write(pcp, val) __size_call(__this_cpu_write_, (pcp), (val)) +# define __this_cpu_write(pcp, val) __pcpu_size_call(__this_cpu_write_, (pcp), (val)) #endif #ifndef __this_cpu_add @@ -465,7 +465,7 @@ do { \ # ifndef __this_cpu_add_8 # define __this_cpu_add_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) # endif -# define __this_cpu_add(pcp, val) __size_call(__this_cpu_add_, (pcp), (val)) +# define __this_cpu_add(pcp, val) __pcpu_size_call(__this_cpu_add_, (pcp), (val)) #endif #ifndef __this_cpu_sub @@ -493,7 +493,7 @@ do { \ # ifndef __this_cpu_and_8 # define __this_cpu_and_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) # endif -# define __this_cpu_and(pcp, val) __size_call(__this_cpu_and_, (pcp), (val)) +# define __this_cpu_and(pcp, val) __pcpu_size_call(__this_cpu_and_, (pcp), (val)) #endif #ifndef __this_cpu_or @@ -509,7 +509,7 @@ do { \ # ifndef __this_cpu_or_8 # define __this_cpu_or_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) # endif -# define __this_cpu_or(pcp, val) __size_call(__this_cpu_or_, (pcp), (val)) +# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val)) #endif #ifndef __this_cpu_xor @@ -525,7 +525,7 @@ do { \ # ifndef __this_cpu_xor_8 # define __this_cpu_xor_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), ^=) # endif -# define __this_cpu_xor(pcp, val) __size_call(__this_cpu_xor_, (pcp), (val)) +# define __this_cpu_xor(pcp, val) __pcpu_size_call(__this_cpu_xor_, (pcp), (val)) #endif /* @@ -556,7 +556,7 @@ do { \ # ifndef irqsafe_cpu_add_8 # define irqsafe_cpu_add_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=) # endif -# define irqsafe_cpu_add(pcp, val) __size_call(irqsafe_cpu_add_, (pcp), (val)) +# define irqsafe_cpu_add(pcp, val) __pcpu_size_call(irqsafe_cpu_add_, (pcp), (val)) #endif #ifndef irqsafe_cpu_sub @@ -584,7 +584,7 @@ do { \ # ifndef irqsafe_cpu_and_8 # define irqsafe_cpu_and_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=) # endif -# define irqsafe_cpu_and(pcp, val) __size_call(irqsafe_cpu_and_, (val)) +# define irqsafe_cpu_and(pcp, val) __pcpu_size_call(irqsafe_cpu_and_, (val)) #endif #ifndef irqsafe_cpu_or @@ -600,7 +600,7 @@ do { \ # ifndef irqsafe_cpu_or_8 # define irqsafe_cpu_or_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=) # endif -# define irqsafe_cpu_or(pcp, val) __size_call(irqsafe_cpu_or_, (val)) +# define irqsafe_cpu_or(pcp, val) __pcpu_size_call(irqsafe_cpu_or_, (val)) #endif #ifndef irqsafe_cpu_xor @@ -616,7 +616,7 @@ do { \ # ifndef irqsafe_cpu_xor_8 # define irqsafe_cpu_xor_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=) # endif -# define irqsafe_cpu_xor(pcp, val) __size_call(irqsafe_cpu_xor_, (val)) +# define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val)) #endif #endif /* __LINUX_PERCPU_H */ diff --git a/mm/percpu.c b/mm/percpu.c index ec158bb5f86d..e2e80fc78601 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -365,6 +365,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * 0 if noop, 1 if successfully extended, -errno on failure. */ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) + __releases(lock) __acquires(lock) { int new_alloc; int *new; -- cgit v1.2.3 From f16250669d78a32bdfb27cec4d791e85141e11e2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:13 +0900 Subject: percpu: make percpu symbols in cpufreq unique This patch updates percpu related symbols in cpufreq such that percpu symbols are unique and don't clash with local symbols. This serves two purposes of decreasing the possibility of global percpu symbol collision and allowing dropping per_cpu__ prefix from percpu symbols. * drivers/cpufreq/cpufreq.c: s/policy_cpu/cpufreq_policy_cpu/ * drivers/cpufreq/freq_table.c: s/show_table/cpufreq_show_table/ * arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c: s/drv_data/acfreq_data/ s/old_perf/acfreq_old_perf/ Partly based on Rusty Russell's "alloc_percpu: rename percpu vars which cause name clashes" patch. Signed-off-by: Tejun Heo Cc: Rusty Russell --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 28 ++++++++++++++-------------- drivers/cpufreq/cpufreq.c | 16 ++++++++-------- drivers/cpufreq/freq_table.c | 12 ++++++------ 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 7d5c3b0ea8da..43eb3465dda7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -68,9 +68,9 @@ struct acpi_cpufreq_data { unsigned int cpu_feature; }; -static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); +static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); -static DEFINE_PER_CPU(struct aperfmperf, old_perf); +static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance *acpi_perf_data; @@ -214,14 +214,14 @@ static u32 get_cur_val(const struct cpumask *mask) if (unlikely(cpumask_empty(mask))) return 0; - switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { + switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; + perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -268,8 +268,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) return 0; - ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); - per_cpu(old_perf, cpu) = perf; + ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); + per_cpu(acfreq_old_perf, cpu) = perf; retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; @@ -278,7 +278,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -322,7 +322,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); struct acpi_processor_performance *perf; struct cpufreq_freqs freqs; struct drv_cmd cmd; @@ -416,7 +416,7 @@ out: static int acpi_cpufreq_verify(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); dprintk("acpi_cpufreq_verify\n"); @@ -563,7 +563,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); - per_cpu(drv_data, cpu) = data; + per_cpu(acfreq_data, cpu) = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; @@ -714,20 +714,20 @@ err_unreg: acpi_processor_unregister_performance(perf, cpu); err_free: kfree(data); - per_cpu(drv_data, cpu) = NULL; + per_cpu(acfreq_data, cpu) = NULL; return result; } static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); dprintk("acpi_cpufreq_cpu_exit\n"); if (data) { cpufreq_frequency_table_put_attr(policy->cpu); - per_cpu(drv_data, policy->cpu) = NULL; + per_cpu(acfreq_data, policy->cpu) = NULL; acpi_processor_unregister_performance(data->acpi_data, policy->cpu); kfree(data); @@ -738,7 +738,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); dprintk("acpi_cpufreq_resume\n"); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3938c7817095..af93a8175c5e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -64,14 +64,14 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock); * - Lock should not be held across * __cpufreq_governor(data, CPUFREQ_GOV_STOP); */ -static DEFINE_PER_CPU(int, policy_cpu); +static DEFINE_PER_CPU(int, cpufreq_policy_cpu); static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); #define lock_policy_rwsem(mode, cpu) \ int lock_policy_rwsem_##mode \ (int cpu) \ { \ - int policy_cpu = per_cpu(policy_cpu, cpu); \ + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); \ BUG_ON(policy_cpu == -1); \ down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ if (unlikely(!cpu_online(cpu))) { \ @@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(lock_policy_rwsem_write); void unlock_policy_rwsem_read(int cpu) { - int policy_cpu = per_cpu(policy_cpu, cpu); + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); BUG_ON(policy_cpu == -1); up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); } @@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read); void unlock_policy_rwsem_write(int cpu) { - int policy_cpu = per_cpu(policy_cpu, cpu); + int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu); BUG_ON(policy_cpu == -1); up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); } @@ -799,7 +799,7 @@ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, /* Set proper policy_cpu */ unlock_policy_rwsem_write(cpu); - per_cpu(policy_cpu, cpu) = managed_policy->cpu; + per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu; if (lock_policy_rwsem_write(cpu) < 0) { /* Should not go through policy unlock path */ @@ -906,7 +906,7 @@ int cpufreq_add_dev_interface(unsigned int cpu, struct cpufreq_policy *policy, if (!cpu_online(j)) continue; per_cpu(cpufreq_cpu_data, j) = policy; - per_cpu(policy_cpu, j) = policy->cpu; + per_cpu(cpufreq_policy_cpu, j) = policy->cpu; } spin_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -991,7 +991,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) cpumask_copy(policy->cpus, cpumask_of(cpu)); /* Initially set CPU itself as the policy_cpu */ - per_cpu(policy_cpu, cpu) = cpu; + per_cpu(cpufreq_policy_cpu, cpu) = cpu; ret = (lock_policy_rwsem_write(cpu) < 0); WARN_ON(ret); @@ -1946,7 +1946,7 @@ static int __init cpufreq_core_init(void) int cpu; for_each_possible_cpu(cpu) { - per_cpu(policy_cpu, cpu) = -1; + per_cpu(cpufreq_policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index a9bd3a05a684..05432216e224 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -174,7 +174,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); -static DEFINE_PER_CPU(struct cpufreq_frequency_table *, show_table); +static DEFINE_PER_CPU(struct cpufreq_frequency_table *, cpufreq_show_table); /** * show_available_freqs - show available frequencies for the specified CPU */ @@ -185,10 +185,10 @@ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf) ssize_t count = 0; struct cpufreq_frequency_table *table; - if (!per_cpu(show_table, cpu)) + if (!per_cpu(cpufreq_show_table, cpu)) return -ENODEV; - table = per_cpu(show_table, cpu); + table = per_cpu(cpufreq_show_table, cpu); for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { if (table[i].frequency == CPUFREQ_ENTRY_INVALID) @@ -217,20 +217,20 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, unsigned int cpu) { dprintk("setting show_table for cpu %u to %p\n", cpu, table); - per_cpu(show_table, cpu) = table; + per_cpu(cpufreq_show_table, cpu) = table; } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr); void cpufreq_frequency_table_put_attr(unsigned int cpu) { dprintk("clearing show_table for cpu %u\n", cpu); - per_cpu(show_table, cpu) = NULL; + per_cpu(cpufreq_show_table, cpu) = NULL; } EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr); struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) { - return per_cpu(show_table, cpu); + return per_cpu(cpufreq_show_table, cpu); } EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); -- cgit v1.2.3 From c6e22f9e3e99cc221fe01a0cacf94a9da8a59c31 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:13 +0900 Subject: percpu: make percpu symbols in xen unique This patch updates percpu related symbols in xen such that percpu symbols are unique and don't clash with local symbols. This serves two purposes of decreasing the possibility of global percpu symbol collision and allowing dropping per_cpu__ prefix from percpu symbols. * arch/x86/xen/smp.c, arch/x86/xen/time.c, arch/ia64/xen/irq_xen.c: add xen_ prefix to percpu variables * arch/ia64/xen/time.c: add xen_ prefix to percpu variables, drop processed_ prefix and make them static Partly based on Rusty Russell's "alloc_percpu: rename percpu vars which cause name clashes" patch. Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Chris Wright --- arch/ia64/xen/irq_xen.c | 131 +++++++++++++++++++++++++----------------------- arch/ia64/xen/time.c | 22 ++++---- arch/x86/xen/smp.c | 41 +++++++-------- arch/x86/xen/time.c | 24 ++++----- 4 files changed, 111 insertions(+), 107 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c index f042e192d2fe..a3fb7cf9ae1d 100644 --- a/arch/ia64/xen/irq_xen.c +++ b/arch/ia64/xen/irq_xen.c @@ -63,19 +63,19 @@ xen_free_irq_vector(int vector) } -static DEFINE_PER_CPU(int, timer_irq) = -1; -static DEFINE_PER_CPU(int, ipi_irq) = -1; -static DEFINE_PER_CPU(int, resched_irq) = -1; -static DEFINE_PER_CPU(int, cmc_irq) = -1; -static DEFINE_PER_CPU(int, cmcp_irq) = -1; -static DEFINE_PER_CPU(int, cpep_irq) = -1; +static DEFINE_PER_CPU(int, xen_timer_irq) = -1; +static DEFINE_PER_CPU(int, xen_ipi_irq) = -1; +static DEFINE_PER_CPU(int, xen_resched_irq) = -1; +static DEFINE_PER_CPU(int, xen_cmc_irq) = -1; +static DEFINE_PER_CPU(int, xen_cmcp_irq) = -1; +static DEFINE_PER_CPU(int, xen_cpep_irq) = -1; #define NAME_SIZE 15 -static DEFINE_PER_CPU(char[NAME_SIZE], timer_name); -static DEFINE_PER_CPU(char[NAME_SIZE], ipi_name); -static DEFINE_PER_CPU(char[NAME_SIZE], resched_name); -static DEFINE_PER_CPU(char[NAME_SIZE], cmc_name); -static DEFINE_PER_CPU(char[NAME_SIZE], cmcp_name); -static DEFINE_PER_CPU(char[NAME_SIZE], cpep_name); +static DEFINE_PER_CPU(char[NAME_SIZE], xen_timer_name); +static DEFINE_PER_CPU(char[NAME_SIZE], xen_ipi_name); +static DEFINE_PER_CPU(char[NAME_SIZE], xen_resched_name); +static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmc_name); +static DEFINE_PER_CPU(char[NAME_SIZE], xen_cmcp_name); +static DEFINE_PER_CPU(char[NAME_SIZE], xen_cpep_name); #undef NAME_SIZE struct saved_irq { @@ -144,64 +144,64 @@ __xen_register_percpu_irq(unsigned int cpu, unsigned int vec, if (xen_slab_ready) { switch (vec) { case IA64_TIMER_VECTOR: - snprintf(per_cpu(timer_name, cpu), - sizeof(per_cpu(timer_name, cpu)), + snprintf(per_cpu(xen_timer_name, cpu), + sizeof(per_cpu(xen_timer_name, cpu)), "%s%d", action->name, cpu); irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu, action->handler, action->flags, - per_cpu(timer_name, cpu), action->dev_id); - per_cpu(timer_irq, cpu) = irq; + per_cpu(xen_timer_name, cpu), action->dev_id); + per_cpu(xen_timer_irq, cpu) = irq; break; case IA64_IPI_RESCHEDULE: - snprintf(per_cpu(resched_name, cpu), - sizeof(per_cpu(resched_name, cpu)), + snprintf(per_cpu(xen_resched_name, cpu), + sizeof(per_cpu(xen_resched_name, cpu)), "%s%d", action->name, cpu); irq = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu, action->handler, action->flags, - per_cpu(resched_name, cpu), action->dev_id); - per_cpu(resched_irq, cpu) = irq; + per_cpu(xen_resched_name, cpu), action->dev_id); + per_cpu(xen_resched_irq, cpu) = irq; break; case IA64_IPI_VECTOR: - snprintf(per_cpu(ipi_name, cpu), - sizeof(per_cpu(ipi_name, cpu)), + snprintf(per_cpu(xen_ipi_name, cpu), + sizeof(per_cpu(xen_ipi_name, cpu)), "%s%d", action->name, cpu); irq = bind_ipi_to_irqhandler(XEN_IPI_VECTOR, cpu, action->handler, action->flags, - per_cpu(ipi_name, cpu), action->dev_id); - per_cpu(ipi_irq, cpu) = irq; + per_cpu(xen_ipi_name, cpu), action->dev_id); + per_cpu(xen_ipi_irq, cpu) = irq; break; case IA64_CMC_VECTOR: - snprintf(per_cpu(cmc_name, cpu), - sizeof(per_cpu(cmc_name, cpu)), + snprintf(per_cpu(xen_cmc_name, cpu), + sizeof(per_cpu(xen_cmc_name, cpu)), "%s%d", action->name, cpu); irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu, - action->handler, - action->flags, - per_cpu(cmc_name, cpu), - action->dev_id); - per_cpu(cmc_irq, cpu) = irq; + action->handler, + action->flags, + per_cpu(xen_cmc_name, cpu), + action->dev_id); + per_cpu(xen_cmc_irq, cpu) = irq; break; case IA64_CMCP_VECTOR: - snprintf(per_cpu(cmcp_name, cpu), - sizeof(per_cpu(cmcp_name, cpu)), + snprintf(per_cpu(xen_cmcp_name, cpu), + sizeof(per_cpu(xen_cmcp_name, cpu)), "%s%d", action->name, cpu); irq = bind_ipi_to_irqhandler(XEN_CMCP_VECTOR, cpu, - action->handler, - action->flags, - per_cpu(cmcp_name, cpu), - action->dev_id); - per_cpu(cmcp_irq, cpu) = irq; + action->handler, + action->flags, + per_cpu(xen_cmcp_name, cpu), + action->dev_id); + per_cpu(xen_cmcp_irq, cpu) = irq; break; case IA64_CPEP_VECTOR: - snprintf(per_cpu(cpep_name, cpu), - sizeof(per_cpu(cpep_name, cpu)), + snprintf(per_cpu(xen_cpep_name, cpu), + sizeof(per_cpu(xen_cpep_name, cpu)), "%s%d", action->name, cpu); irq = bind_ipi_to_irqhandler(XEN_CPEP_VECTOR, cpu, - action->handler, - action->flags, - per_cpu(cpep_name, cpu), - action->dev_id); - per_cpu(cpep_irq, cpu) = irq; + action->handler, + action->flags, + per_cpu(xen_cpep_name, cpu), + action->dev_id); + per_cpu(xen_cpep_irq, cpu) = irq; break; case IA64_CPE_VECTOR: case IA64_MCA_RENDEZ_VECTOR: @@ -275,30 +275,33 @@ unbind_evtchn_callback(struct notifier_block *nfb, if (action == CPU_DEAD) { /* Unregister evtchn. */ - if (per_cpu(cpep_irq, cpu) >= 0) { - unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL); - per_cpu(cpep_irq, cpu) = -1; + if (per_cpu(xen_cpep_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(xen_cpep_irq, cpu), + NULL); + per_cpu(xen_cpep_irq, cpu) = -1; } - if (per_cpu(cmcp_irq, cpu) >= 0) { - unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL); - per_cpu(cmcp_irq, cpu) = -1; + if (per_cpu(xen_cmcp_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(xen_cmcp_irq, cpu), + NULL); + per_cpu(xen_cmcp_irq, cpu) = -1; } - if (per_cpu(cmc_irq, cpu) >= 0) { - unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL); - per_cpu(cmc_irq, cpu) = -1; + if (per_cpu(xen_cmc_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(xen_cmc_irq, cpu), NULL); + per_cpu(xen_cmc_irq, cpu) = -1; } - if (per_cpu(ipi_irq, cpu) >= 0) { - unbind_from_irqhandler(per_cpu(ipi_irq, cpu), NULL); - per_cpu(ipi_irq, cpu) = -1; + if (per_cpu(xen_ipi_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(xen_ipi_irq, cpu), NULL); + per_cpu(xen_ipi_irq, cpu) = -1; } - if (per_cpu(resched_irq, cpu) >= 0) { - unbind_from_irqhandler(per_cpu(resched_irq, cpu), - NULL); - per_cpu(resched_irq, cpu) = -1; + if (per_cpu(xen_resched_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), + NULL); + per_cpu(xen_resched_irq, cpu) = -1; } - if (per_cpu(timer_irq, cpu) >= 0) { - unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL); - per_cpu(timer_irq, cpu) = -1; + if (per_cpu(xen_timer_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(xen_timer_irq, cpu), + NULL); + per_cpu(xen_timer_irq, cpu) = -1; } } return NOTIFY_OK; diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index dbeadb9c8e20..c1c544513e8d 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -34,15 +34,15 @@ #include "../kernel/fsyscall_gtod_data.h" -DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); -DEFINE_PER_CPU(unsigned long, processed_stolen_time); -DEFINE_PER_CPU(unsigned long, processed_blocked_time); +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); +static DEFINE_PER_CPU(unsigned long, xen_stolen_time); +static DEFINE_PER_CPU(unsigned long, xen_blocked_time); /* taken from i386/kernel/time-xen.c */ static void xen_init_missing_ticks_accounting(int cpu) { struct vcpu_register_runstate_memory_area area; - struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); + struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu); int rc; memset(runstate, 0, sizeof(*runstate)); @@ -52,8 +52,8 @@ static void xen_init_missing_ticks_accounting(int cpu) &area); WARN_ON(rc && rc != -ENOSYS); - per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; - per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] + per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; + per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] + runstate->time[RUNSTATE_offline]; } @@ -68,7 +68,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) BUG_ON(preemptible()); - state = &__get_cpu_var(runstate); + state = &__get_cpu_var(xen_runstate); /* * The runstate info is always updated by the hypervisor on @@ -103,12 +103,12 @@ consider_steal_time(unsigned long new_itm) * This function just checks and reject this effect. */ if (!time_after_eq(runstate.time[RUNSTATE_blocked], - per_cpu(processed_blocked_time, cpu))) + per_cpu(xen_blocked_time, cpu))) blocked = 0; if (!time_after_eq(runstate.time[RUNSTATE_runnable] + runstate.time[RUNSTATE_offline], - per_cpu(processed_stolen_time, cpu))) + per_cpu(xen_stolen_time, cpu))) stolen = 0; if (!time_after(delta_itm + new_itm, ia64_get_itc())) @@ -147,8 +147,8 @@ consider_steal_time(unsigned long new_itm) } else { local_cpu_data->itm_next = delta_itm + new_itm; } - per_cpu(processed_stolen_time, cpu) += NS_PER_TICK * stolen; - per_cpu(processed_blocked_time, cpu) += NS_PER_TICK * blocked; + per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen; + per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked; } return delta_itm; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fe03eeed7b48..1167d9830f5f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,10 +35,10 @@ cpumask_var_t xen_cpu_initialized_map; -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); -static DEFINE_PER_CPU(int, callfuncsingle_irq); -static DEFINE_PER_CPU(int, debug_irq) = -1; +static DEFINE_PER_CPU(int, xen_resched_irq); +static DEFINE_PER_CPU(int, xen_callfunc_irq); +static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); +static DEFINE_PER_CPU(int, xen_debug_irq) = -1; static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); @@ -103,7 +103,7 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(resched_irq, cpu) = rc; + per_cpu(xen_resched_irq, cpu) = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, @@ -114,7 +114,7 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(callfunc_irq, cpu) = rc; + per_cpu(xen_callfunc_irq, cpu) = rc; debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -122,7 +122,7 @@ static int xen_smp_intr_init(unsigned int cpu) debug_name, NULL); if (rc < 0) goto fail; - per_cpu(debug_irq, cpu) = rc; + per_cpu(xen_debug_irq, cpu) = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, @@ -133,19 +133,20 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(callfuncsingle_irq, cpu) = rc; + per_cpu(xen_callfuncsingle_irq, cpu) = rc; return 0; fail: - if (per_cpu(resched_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); - if (per_cpu(callfunc_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); - if (per_cpu(debug_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); - if (per_cpu(callfuncsingle_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); + if (per_cpu(xen_resched_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + if (per_cpu(xen_callfunc_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + if (per_cpu(xen_debug_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), + NULL); return rc; } @@ -348,10 +349,10 @@ static void xen_cpu_die(unsigned int cpu) current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } - unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0a5aa44299a5..26e37b787ad3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -31,14 +31,14 @@ #define NS_PER_TICK (1000000000LL / HZ) /* runstate info updated by Xen */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); /* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); /* unused ns of stolen and blocked time */ -static DEFINE_PER_CPU(u64, residual_stolen); -static DEFINE_PER_CPU(u64, residual_blocked); +static DEFINE_PER_CPU(u64, xen_residual_stolen); +static DEFINE_PER_CPU(u64, xen_residual_blocked); /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) @@ -79,7 +79,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) BUG_ON(preemptible()); - state = &__get_cpu_var(runstate); + state = &__get_cpu_var(xen_runstate); /* * The runstate info is always updated by the hypervisor on @@ -97,14 +97,14 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) /* return true when a vcpu could run but has no real cpu to run on */ bool xen_vcpu_stolen(int vcpu) { - return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } static void setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; - area.addr.v = &per_cpu(runstate, cpu); + area.addr.v = &per_cpu(xen_runstate, cpu); if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area)) @@ -122,7 +122,7 @@ static void do_stolen_accounting(void) WARN_ON(state.state != RUNSTATE_running); - snap = &__get_cpu_var(runstate_snapshot); + snap = &__get_cpu_var(xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; @@ -133,24 +133,24 @@ static void do_stolen_accounting(void) /* Add the appropriate number of ticks of stolen time, including any left-overs from last time. */ - stolen = runnable + offline + __get_cpu_var(residual_stolen); + stolen = runnable + offline + __get_cpu_var(xen_residual_stolen); if (stolen < 0) stolen = 0; ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __get_cpu_var(residual_stolen) = stolen; + __get_cpu_var(xen_residual_stolen) = stolen; account_steal_ticks(ticks); /* Add the appropriate number of ticks of blocked time, including any left-overs from last time. */ - blocked += __get_cpu_var(residual_blocked); + blocked += __get_cpu_var(xen_residual_blocked); if (blocked < 0) blocked = 0; ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __get_cpu_var(residual_blocked) = blocked; + __get_cpu_var(xen_residual_blocked) = blocked; account_idle_ticks(ticks); } -- cgit v1.2.3 From 0fe1e009541e925adc1748a605d8b66188e4b2ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:14 +0900 Subject: percpu: make percpu symbols in x86 unique This patch updates percpu related symbols in x86 such that percpu symbols are unique and don't clash with local symbols. This serves two purposes of decreasing the possibility of global percpu symbol collision and allowing dropping per_cpu__ prefix from percpu symbols. * arch/x86/kernel/cpu/common.c: rename local variable to avoid collision * arch/x86/kvm/svm.c: s/svm_data/sd/ for local variables to avoid collision * arch/x86/kernel/cpu/cpu_debug.c: s/cpu_arr/cpud_arr/ s/priv_arr/cpud_priv_arr/ s/cpu_priv_count/cpud_priv_count/ * arch/x86/kernel/cpu/intel_cacheinfo.c: s/cpuid4_info/ici_cpuid4_info/ s/cache_kobject/ici_cache_kobject/ s/index_kobject/ici_index_kobject/ * arch/x86/kernel/ds.c: s/cpu_context/cpu_ds_context/ Partly based on Rusty Russell's "alloc_percpu: rename percpu vars which cause name clashes" patch. Signed-off-by: Tejun Heo Acked-by: (kvm) Avi Kivity Cc: Rusty Russell Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Marcelo Tosatti Cc: x86@kernel.org --- arch/x86/kernel/cpu/common.c | 8 ++--- arch/x86/kernel/cpu/cpu_debug.c | 30 ++++++++--------- arch/x86/kernel/cpu/intel_cacheinfo.c | 54 +++++++++++++++--------------- arch/x86/kernel/ds.c | 4 +-- arch/x86/kvm/svm.c | 63 +++++++++++++++++------------------ 5 files changed, 79 insertions(+), 80 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..3192f22f2fdd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1093,7 +1093,7 @@ static void clear_all_debug_regs(void) void __cpuinit cpu_init(void) { - struct orig_ist *orig_ist; + struct orig_ist *oist; struct task_struct *me; struct tss_struct *t; unsigned long v; @@ -1102,7 +1102,7 @@ void __cpuinit cpu_init(void) cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); - orig_ist = &per_cpu(orig_ist, cpu); + oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && @@ -1143,12 +1143,12 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - if (!orig_ist->ist[0]) { + if (!oist->ist[0]) { char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = + oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } } diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index dca325c03999..b368cd862997 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -30,9 +30,9 @@ #include #include -static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); -static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); -static DEFINE_PER_CPU(int, cpu_priv_count); +static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr); +static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr); +static DEFINE_PER_CPU(int, cpud_priv_count); static DEFINE_MUTEX(cpu_debug_lock); @@ -531,7 +531,7 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, /* Already intialized */ if (file == CPU_INDEX_BIT) - if (per_cpu(cpu_arr[type].init, cpu)) + if (per_cpu(cpud_arr[type].init, cpu)) return 0; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -543,8 +543,8 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, priv->reg = reg; priv->file = file; mutex_lock(&cpu_debug_lock); - per_cpu(priv_arr[type], cpu) = priv; - per_cpu(cpu_priv_count, cpu)++; + per_cpu(cpud_priv_arr[type], cpu) = priv; + per_cpu(cpud_priv_count, cpu)++; mutex_unlock(&cpu_debug_lock); if (file) @@ -552,10 +552,10 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, dentry, (void *)priv, &cpu_fops); else { debugfs_create_file(cpu_base[type].name, S_IRUGO, - per_cpu(cpu_arr[type].dentry, cpu), + per_cpu(cpud_arr[type].dentry, cpu), (void *)priv, &cpu_fops); mutex_lock(&cpu_debug_lock); - per_cpu(cpu_arr[type].init, cpu) = 1; + per_cpu(cpud_arr[type].init, cpu) = 1; mutex_unlock(&cpu_debug_lock); } @@ -615,7 +615,7 @@ static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) if (!is_typeflag_valid(cpu, cpu_base[type].flag)) continue; cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); - per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; + per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry; if (type < CPU_TSS_BIT) err = cpu_init_msr(cpu, type, cpu_dentry); @@ -647,11 +647,11 @@ static int cpu_init_cpu(void) err = cpu_init_allreg(cpu, cpu_dentry); pr_info("cpu%d(%d) debug files %d\n", - cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu)); - if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) { + cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu)); + if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) { pr_err("Register files count %d exceeds limit %d\n", - per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES); - per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES; + per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES); + per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES; err = -ENFILE; } if (err) @@ -676,8 +676,8 @@ static void __exit cpu_debug_exit(void) debugfs_remove_recursive(cpu_debugfs_dir); for (cpu = 0; cpu < nr_cpu_ids; cpu++) - for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++) - kfree(per_cpu(priv_arr[i], cpu)); + for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++) + kfree(per_cpu(cpud_priv_arr[i], cpu)); } module_init(cpu_debug_init); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3e..f5ccb4fa5a5d 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -512,8 +512,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #ifdef CONFIG_SYSFS /* pointer to _cpuid4_info array (for each cache leaf) */ -static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -526,7 +526,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { struct cpuinfo_x86 *d; for_each_online_cpu(i) { - if (!per_cpu(cpuid4_info, i)) + if (!per_cpu(ici_cpuid4_info, i)) continue; d = &cpu_data(i); this_leaf = CPUID4_INFO_IDX(i, index); @@ -548,7 +548,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) c->apicid >> index_msb) { cpumask_set_cpu(i, to_cpumask(this_leaf->shared_cpu_map)); - if (i != cpu && per_cpu(cpuid4_info, i)) { + if (i != cpu && per_cpu(ici_cpuid4_info, i)) { sibling_leaf = CPUID4_INFO_IDX(i, index); cpumask_set_cpu(cpu, to_cpumask( @@ -587,8 +587,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) for (i = 0; i < num_cache_leaves; i++) cache_remove_shared_cpu_map(cpu, i); - kfree(per_cpu(cpuid4_info, cpu)); - per_cpu(cpuid4_info, cpu) = NULL; + kfree(per_cpu(ici_cpuid4_info, cpu)); + per_cpu(ici_cpuid4_info, cpu) = NULL; } static int @@ -627,15 +627,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) if (num_cache_leaves == 0) return -ENOENT; - per_cpu(cpuid4_info, cpu) = kzalloc( + per_cpu(ici_cpuid4_info, cpu) = kzalloc( sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) + if (per_cpu(ici_cpuid4_info, cpu) == NULL) return -ENOMEM; smp_call_function_single(cpu, get_cpu_leaves, &retval, true); if (retval) { - kfree(per_cpu(cpuid4_info, cpu)); - per_cpu(cpuid4_info, cpu) = NULL; + kfree(per_cpu(ici_cpuid4_info, cpu)); + per_cpu(ici_cpuid4_info, cpu) = NULL; } return retval; @@ -647,7 +647,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ /* pointer to kobject for cpuX/cache */ -static DEFINE_PER_CPU(struct kobject *, cache_kobject); +static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); struct _index_kobject { struct kobject kobj; @@ -656,8 +656,8 @@ struct _index_kobject { }; /* pointer to array of kobjects for cpuX/cache/indexY */ -static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -876,10 +876,10 @@ static struct kobj_type ktype_percpu_entry = { static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { - kfree(per_cpu(cache_kobject, cpu)); - kfree(per_cpu(index_kobject, cpu)); - per_cpu(cache_kobject, cpu) = NULL; - per_cpu(index_kobject, cpu) = NULL; + kfree(per_cpu(ici_cache_kobject, cpu)); + kfree(per_cpu(ici_index_kobject, cpu)); + per_cpu(ici_cache_kobject, cpu) = NULL; + per_cpu(ici_index_kobject, cpu) = NULL; free_cache_attributes(cpu); } @@ -895,14 +895,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) return err; /* Allocate all required memory */ - per_cpu(cache_kobject, cpu) = + per_cpu(ici_cache_kobject, cpu) = kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) + if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) goto err_out; - per_cpu(index_kobject, cpu) = kzalloc( + per_cpu(ici_index_kobject, cpu) = kzalloc( sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); - if (unlikely(per_cpu(index_kobject, cpu) == NULL)) + if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) goto err_out; return 0; @@ -926,7 +926,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (unlikely(retval < 0)) return retval; - retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), + retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), &ktype_percpu_entry, &sys_dev->kobj, "%s", "cache"); if (retval < 0) { @@ -940,12 +940,12 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object->index = i; retval = kobject_init_and_add(&(this_object->kobj), &ktype_cache, - per_cpu(cache_kobject, cpu), + per_cpu(ici_cache_kobject, cpu), "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); - kobject_put(per_cpu(cache_kobject, cpu)); + kobject_put(per_cpu(ici_cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); return retval; } @@ -953,7 +953,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); - kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); + kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); return 0; } @@ -962,7 +962,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - if (per_cpu(cpuid4_info, cpu) == NULL) + if (per_cpu(ici_cpuid4_info, cpu) == NULL) return; if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; @@ -970,7 +970,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); - kobject_put(per_cpu(cache_kobject, cpu)); + kobject_put(per_cpu(ici_cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); } diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ef42a038f1a6..1c47390dd0e5 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -265,13 +265,13 @@ struct ds_context { int cpu; }; -static DEFINE_PER_CPU(struct ds_context *, cpu_context); +static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); static struct ds_context *ds_get_context(struct task_struct *task, int cpu) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); + (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); struct ds_context *context = NULL; struct ds_context *new_context = NULL; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 944cc9c04b3c..6c79a14a3b6f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -319,7 +319,7 @@ static void svm_hardware_disable(void *garbage) static void svm_hardware_enable(void *garbage) { - struct svm_cpu_data *svm_data; + struct svm_cpu_data *sd; uint64_t efer; struct descriptor_table gdt_descr; struct desc_struct *gdt; @@ -329,62 +329,61 @@ static void svm_hardware_enable(void *garbage) printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); return; } - svm_data = per_cpu(svm_data, me); + sd = per_cpu(svm_data, me); - if (!svm_data) { + if (!sd) { printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", me); return; } - svm_data->asid_generation = 1; - svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; - svm_data->next_asid = svm_data->max_asid + 1; + sd->asid_generation = 1; + sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; + sd->next_asid = sd->max_asid + 1; kvm_get_gdt(&gdt_descr); gdt = (struct desc_struct *)gdt_descr.base; - svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); + sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); rdmsrl(MSR_EFER, efer); wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, - page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + page_to_pfn(sd->save_area) << PAGE_SHIFT); } static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *svm_data - = per_cpu(svm_data, raw_smp_processor_id()); + struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); - if (!svm_data) + if (!sd) return; per_cpu(svm_data, raw_smp_processor_id()) = NULL; - __free_page(svm_data->save_area); - kfree(svm_data); + __free_page(sd->save_area); + kfree(sd); } static int svm_cpu_init(int cpu) { - struct svm_cpu_data *svm_data; + struct svm_cpu_data *sd; int r; - svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); - if (!svm_data) + sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); + if (!sd) return -ENOMEM; - svm_data->cpu = cpu; - svm_data->save_area = alloc_page(GFP_KERNEL); + sd->cpu = cpu; + sd->save_area = alloc_page(GFP_KERNEL); r = -ENOMEM; - if (!svm_data->save_area) + if (!sd->save_area) goto err_1; - per_cpu(svm_data, cpu) = svm_data; + per_cpu(svm_data, cpu) = sd; return 0; err_1: - kfree(svm_data); + kfree(sd); return r; } @@ -1094,16 +1093,16 @@ static void save_host_msrs(struct kvm_vcpu *vcpu) #endif } -static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) +static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { - if (svm_data->next_asid > svm_data->max_asid) { - ++svm_data->asid_generation; - svm_data->next_asid = 1; + if (sd->next_asid > sd->max_asid) { + ++sd->asid_generation; + sd->next_asid = 1; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } - svm->asid_generation = svm_data->asid_generation; - svm->vmcb->control.asid = svm_data->next_asid++; + svm->asid_generation = sd->asid_generation; + svm->vmcb->control.asid = sd->next_asid++; } static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) @@ -2377,8 +2376,8 @@ static void reload_tss(struct kvm_vcpu *vcpu) { int cpu = raw_smp_processor_id(); - struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); - svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + sd->tss_desc->type = 9; /* available 32/64-bit TSS */ load_TR_desc(); } @@ -2386,12 +2385,12 @@ static void pre_svm_run(struct vcpu_svm *svm) { int cpu = raw_smp_processor_id(); - struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; /* FIXME: handle wraparound of asid_generation */ - if (svm->asid_generation != svm_data->asid_generation) - new_asid(svm, svm_data); + if (svm->asid_generation != sd->asid_generation) + new_asid(svm, sd); } static void svm_inject_nmi(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 2d06ef7f42ed8c9969c9aa84e95df5d5c6378327 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 1 Nov 2009 12:49:44 -0500 Subject: crypto: ghash-intel - Hard-code pshufb Old gases don't have a clue what pshufb stands for so we have to hard-code it for now. Reported-by: Andrew Morton Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index b9e787a511da..71768d543dbb 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -100,9 +100,11 @@ ENTRY(clmul_ghash_mul) movups (%rdi), DATA movups (%rsi), SHASH movaps .Lbswap_mask, BSWAP - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 call __clmul_gf128mul_ble - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 movups DATA, (%rdi) ret @@ -116,18 +118,21 @@ ENTRY(clmul_ghash_update) movaps .Lbswap_mask, BSWAP movups (%rdi), DATA movups (%rcx), SHASH - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 .align 4 .Lupdate_loop: movups (%rsi), IN1 - pshufb BSWAP, IN1 + # pshufb BSWAP, IN1 + .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 pxor IN1, DATA call __clmul_gf128mul_ble sub $16, %rdx add $16, %rsi cmp $16, %rdx jge .Lupdate_loop - pshufb BSWAP, DATA + # pshufb BSWAP, DATA + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 movups DATA, (%rdi) .Lupdate_just_ret: ret @@ -140,7 +145,8 @@ ENTRY(clmul_ghash_update) ENTRY(clmul_ghash_setkey) movaps .Lbswap_mask, BSWAP movups (%rsi), %xmm0 - pshufb BSWAP, %xmm0 + # pshufb BSWAP, %xmm0 + .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 movaps %xmm0, %xmm1 psllq $1, %xmm0 psrlq $63, %xmm1 -- cgit v1.2.3 From 16121d70fdf9eeb05ead46b241a293156323dbbe Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 1 Nov 2009 19:27:05 -0500 Subject: x86: Fix printk message typo in mtrr cleanup code Trivial typo. Signed-off-by: Dave Jones LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 315738c74aad..73c86db5acbe 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -846,7 +846,7 @@ int __init mtrr_cleanup(unsigned address_bits) sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM coverred: %ldM\n", + printk(KERN_INFO "total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { -- cgit v1.2.3 From 05154752cf3767c544b65b5e340793d40b3f1229 Mon Sep 17 00:00:00 2001 From: Gottfried Haider Date: Mon, 2 Nov 2009 11:51:11 +0100 Subject: x86: Add reboot quirk for 3 series Mac mini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reboot does not work out of the box on my "Early 2009" Mac mini (3,1). Detect this machine via DMI as we do for recent MacBooks. Signed-off-by: Gottfried Haider Cc: Ozan ÇaÄŸlayan Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a1a3cdda06e1..f93078746e00 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -436,6 +436,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), }, }, + { /* Handle problems with rebooting on Apple Macmini3,1 */ + .callback = set_pci_reboot, + .ident = "Apple Macmini3,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), + }, + }, { } }; -- cgit v1.2.3 From 7a7732bc0f7c46f217dbec723f25366b6285cc42 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:31 -0800 Subject: x86: Unify fixup_irqs() for 32-bit and 64-bit kernels There is no reason to have different fixup_irqs() for 32-bit and 64-bit kernels. Unify by using the superior 64-bit version for both the kernels. Signed-off-by: Suresh Siddha Signed-off-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.562512739@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 45 ------------------------------------ arch/x86/kernel/irq_64.c | 58 ----------------------------------------------- 3 files changed, 59 insertions(+), 103 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 391206199515..3ea66556e5e1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -276,3 +276,62 @@ void smp_generic_interrupt(struct pt_regs *regs) } EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); + +#ifdef CONFIG_HOTPLUG_CPU +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) +{ + unsigned int irq; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + int break_affinity = 0; + int set_affinity = 1; + const struct cpumask *affinity; + + if (!desc) + continue; + if (irq == 2) + continue; + + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + + affinity = desc->affinity; + if (!irq_has_action(irq) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; + } + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + break_affinity = 1; + affinity = cpu_all_mask; + } + + if (desc->chip->mask) + desc->chip->mask(irq); + + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, affinity); + else if (!(warned++)) + set_affinity = 0; + + if (desc->chip->unmask) + desc->chip->unmask(irq); + + spin_unlock(&desc->lock); + + if (break_affinity && set_affinity) + printk("Broke affinity for irq %i\n", irq); + else if (!set_affinity) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe2329..10709f29d166 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - -#ifdef CONFIG_HOTPLUG_CPU - -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - affinity = desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; - } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (desc->action) - printk_once("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0dd..acf8fbf8fbda 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -#ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - static int warned; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); - - affinity = desc->affinity; - if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); - continue; - } - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_all_mask; - } - - if (desc->chip->mask) - desc->chip->mask(irq); - - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (!(warned++)) - set_affinity = 0; - - if (desc->chip->unmask) - desc->chip->unmask(irq); - - spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif extern void call_softirq(void); -- cgit v1.2.3 From 84e21493a3b28c9fefe99fe827fc0c0c101a813d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:32 -0800 Subject: x86, intr-remap: Avoid irq_chip mask/unmask in fixup_irqs() for intr-remapping In the presence of interrupt-remapping, irqs will be migrated in the process context and we don't do (and there is no need to) irq_chip mask/unmask while migrating the interrupt. Similarly fix the fixup_irqs() that get called during cpu offline and avoid calling irq_chip mask/unmask for irqs that are ok to be migrated in the process context. While we didn't observe any race condition with the existing code, this change takes complete advantage of interrupt-remapping in the newer generation platforms and avoids any potential HW lockup's (that often worry Eric :) Signed-off-by: Suresh Siddha Acked-by: Eric W. Biederman Cc: garyhade@us.ibm.com LKML-Reference: <20091026230001.661423939@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3ea66556e5e1..342bcbca19b4 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -310,7 +310,7 @@ void fixup_irqs(void) affinity = cpu_all_mask; } - if (desc->chip->mask) + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) @@ -318,7 +318,7 @@ void fixup_irqs(void) else if (!(warned++)) set_affinity = 0; - if (desc->chip->unmask) + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) desc->chip->unmask(irq); spin_unlock(&desc->lock); -- cgit v1.2.3 From 23359a88e7eca3c4f402562b102f23014db3c2aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:33 -0800 Subject: x86: Remove move_cleanup_count from irq_cfg move_cleanup_count for each irq in irq_cfg is keeping track of the total number of cpus that need to free the corresponding vectors associated with the irq which has now been migrated to new destination. As long as this move_cleanup_count is non-zero (i.e., as long as we have n't freed the vector allocations on the old destinations) we were preventing the irq's further migration. This cleanup count is unnecessary and it is enough to not allow the irq migration till we send the cleanup vector to the previous irq destination, for which we already have irq_cfg's move_in_progress. All we need to make sure is that we free the vector at the old desintation but we don't need to wait till that gets freed. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.752968906@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 1 - arch/x86/kernel/apic/io_apic.c | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1984ce9a13d2..6e124269fd4b 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,7 +94,6 @@ struct irq_cfg { struct irq_pin_list *irq_2_pin; cpumask_var_t domain; cpumask_var_t old_domain; - unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ce16b65cfdcc..e9e5b02c3af2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1161,7 +1161,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if (cfg->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -2234,14 +2234,10 @@ void send_cleanup_vector(struct irq_cfg *cfg) if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } @@ -2430,8 +2426,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2449,7 +2443,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) goto unlock; } __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; unlock: spin_unlock(&desc->lock); } -- cgit v1.2.3 From a5e74b841930bec78a4684ab9f208b2ddfe7c736 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:34 -0800 Subject: x86: Force irq complete move during cpu offline When a cpu goes offline, fixup_irqs() try to move irq's currently destined to the offline cpu to a new cpu. But this attempt will fail if the irq is recently moved to this cpu and the irq still hasn't arrived at this cpu (for non intr-remapping platforms this is when we free the vector allocation at the previous destination) that is about to go offline. This will endup with the interrupt subsystem still pointing the irq to the offline cpu, causing that irq to not work any more. Fix this by forcing the irq to complete its move (its been a long time we moved the irq to this cpu which we are offlining now) and then move this irq to a new cpu before this cpu goes offline. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.848830905@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 1 + arch/x86/kernel/apic/io_apic.c | 18 +++++++++++++++--- arch/x86/kernel/irq.c | 7 +++++++ 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cbed6f4..ffd700ff5dcb 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include extern void fixup_irqs(void); +extern void irq_force_complete_move(int); #endif extern void (*generic_interrupt_extension)(void); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e9e5b02c3af2..4e886efd9a15 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2450,21 +2450,33 @@ unlock: irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void __irq_complete_move(struct irq_desc **descp, unsigned vector) { struct irq_desc *desc = *descp; struct irq_cfg *cfg = desc->chip_data; - unsigned vector, me; + unsigned me; if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); } + +static void irq_complete_move(struct irq_desc **descp) +{ + __irq_complete_move(descp, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + + __irq_complete_move(&desc, cfg->vector); +} #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 342bcbca19b4..b10a5e1da06c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -305,6 +305,13 @@ void fixup_irqs(void) continue; } + /* + * Complete the irq move. This cpu is going down and for + * non intr-remapping case, we can't wait till this interrupt + * arrives at this cpu before completing the irq move. + */ + irq_force_complete_move(irq); + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; affinity = cpu_all_mask; -- cgit v1.2.3 From b3ec0a37a7907813bb4fb85a2d94102c152470b7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:35 -0800 Subject: x86: Use EOI register in io-apic on intel platforms IO-APIC's in intel chipsets support EOI register starting from IO-APIC version 2. Use that when ever we need to clear the IO-APIC RTE's RemoteIRR bit explicitly. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.947855317@sbs-t61.sc.intel.com> [ Marked use_eio_reg as __read_mostly, fixed small details ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 81 ++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4e886efd9a15..31e9db3c12ad 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2492,6 +2492,51 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; +static int use_eoi_reg __read_mostly; + +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } +} + +static void eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static int ioapic_supports_eoi(void) +{ + struct pci_dev *root; + + root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + if (root && root->vendor == PCI_VENDOR_ID_INTEL && + mp_ioapics[0].apicver >= 0x2) { + use_eoi_reg = 1; + printk(KERN_INFO "IO-APIC supports EOI register\n"); + } else + printk(KERN_INFO "IO-APIC doesn't support EOI\n"); + + return 0; +} + +fs_initcall(ioapic_supports_eoi); + static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2575,37 +2620,19 @@ static void ack_apic_level(unsigned int irq) /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); + + if (use_eoi_reg) + eoi_ioapic_irq(desc); + else { + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } } } #ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - io_apic_eoi(entry->apic, entry->pin); -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); -- cgit v1.2.3 From 5231a68614b94f60e8f6c56bc6e3d75955b9e75e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:36 -0800 Subject: x86: Remove local_irq_enable()/local_irq_disable() in fixup_irqs() To ensure that we handle all the pending interrupts (destined for this cpu that is going down) in the interrupt subsystem before the cpu goes offline, fixup_irqs() does: local_irq_enable(); mdelay(1); local_irq_disable(); Enabling interrupts is not a good thing as this cpu is already offline. So this patch replaces that logic with, mdelay(1); check APIC_IRR bits Retrigger the irq at the new destination if any interrupt has arrived via IPI. For IO-APIC level triggered interrupts, this retrigger IPI will appear as an edge interrupt. ack_apic_level() will detect this condition and IO-APIC RTE's remoteIRR is cleared using directed EOI(using IO-APIC EOI register) on Intel platforms and for others it uses the existing mask+edge logic followed by unmask+level. We can also remove mdelay() and then send spuriuous interrupts to new cpu targets for all the irqs that were handled previously by this cpu that is going offline. While it works, I have seen spurious interrupt messages (nothing wrong but still annoying messages during cpu offline, which can be seen during suspend/resume etc) Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230002.043281924@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b10a5e1da06c..8a82728d47c1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -281,7 +281,7 @@ EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irq; + unsigned int irq, vector; static int warned; struct irq_desc *desc; @@ -336,9 +336,33 @@ void fixup_irqs(void) printk("Cannot set affinity for irq %i\n", irq); } - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + /* + * We can remove mdelay() and then send spuriuous interrupts to + * new cpu targets for all the irqs that were handled previously by + * this cpu. While it works, I have seen spurious interrupt messages + * (nothing wrong but still...). + * + * So for now, retain mdelay(1) and check the IRR and then send those + * interrupts to new targets as this cpu is already offlined... + */ mdelay(1); - local_irq_disable(); + + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irr; + + if (__get_cpu_var(vector_irq)[vector] < 0) + continue; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1 << (vector % 32))) { + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + spin_lock(&desc->lock); + if (desc->chip->retrigger) + desc->chip->retrigger(irq); + spin_unlock(&desc->lock); + } + } } #endif -- cgit v1.2.3 From 6b9de613ae9c79b637e070136585dde029578065 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 2 Nov 2009 20:36:51 +0100 Subject: sched: Disable SD_PREFER_LOCAL at node level Yanmin Zhang reported that SD_PREFER_LOCAL induces an order of magnitude increase in select_task_rq_fair() overhead while running heavy wakeup benchmarks (tbench and vmark). Since SD_BALANCE_WAKE is off at node level, turn SD_PREFER_LOCAL off as well pending further investigation. Reported-by: Zhang, Yanmin Signed-off-by: Mike Galbraith Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d823c245f63b..40e37b10c6c0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ -- cgit v1.2.3 From 3b0d65969b549b796abc6f0230f6142fed365d49 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Nov 2009 09:11:15 -0500 Subject: crypto: ghash-intel - Add PSHUFB macros Add PSHUFB macros instead of repeating byte sequences, suggested by Ingo. Signed-off-by: Herbert Xu Acked-by: Ingo Molnar --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 11 ++++++----- arch/x86/include/asm/i387.h | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 71768d543dbb..59584982fb75 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -17,6 +17,7 @@ */ #include +#include .align 16 .Lbswap_mask: @@ -101,7 +102,7 @@ ENTRY(clmul_ghash_mul) movups (%rsi), SHASH movaps .Lbswap_mask, BSWAP # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 call __clmul_gf128mul_ble # pshufb BSWAP, DATA .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 @@ -119,12 +120,12 @@ ENTRY(clmul_ghash_update) movups (%rdi), DATA movups (%rcx), SHASH # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 .align 4 .Lupdate_loop: movups (%rsi), IN1 # pshufb BSWAP, IN1 - .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 + PSHUFB_XMM5_XMM6 pxor IN1, DATA call __clmul_gf128mul_ble sub $16, %rdx @@ -132,7 +133,7 @@ ENTRY(clmul_ghash_update) cmp $16, %rdx jge .Lupdate_loop # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 movups DATA, (%rdi) .Lupdate_just_ret: ret @@ -146,7 +147,7 @@ ENTRY(clmul_ghash_setkey) movaps .Lbswap_mask, BSWAP movups (%rsi), %xmm0 # pshufb BSWAP, %xmm0 - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM5_XMM0 movaps %xmm0, %xmm1 psllq $1, %xmm0 psrlq $63, %xmm1 diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0b20bbb758f2..ebfb8a9e11f7 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -10,6 +10,8 @@ #ifndef _ASM_X86_I387_H #define _ASM_X86_I387_H +#ifndef __ASSEMBLY__ + #include #include #include @@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } +#endif /* __ASSEMBLY__ */ + +#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 +#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 + #endif /* _ASM_X86_I387_H */ -- cgit v1.2.3 From a489ca355efaf9efa4990b0f8f30ab650a206273 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 2 Nov 2009 16:59:15 -0800 Subject: x86: Make sure we also print a Code: line for show_regs() show_regs() is called as a mini BUG() equivalent in some places, specifically for the "scheduling while atomic" case. Unfortunately right now it does not print a Code: line unlike a real bug/oops. This patch changes the x86 implementation of show_regs() so that it calls the same function as oopses do to print the registers as well as the Code: line. Signed-off-by: Arjan van de Ven LKML-Reference: <20091102165915.4a980fc0@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..e658331edb6d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -187,7 +187,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, ®s->sp, regs->bp); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b683170..2386999bfcd2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -226,8 +226,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - printk(KERN_INFO "CPU %d:", smp_processor_id()); - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -- cgit v1.2.3 From 01dd95827726534230d8f03f7e6faafe24e49260 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 3 Nov 2009 10:55:20 -0500 Subject: crypto: ghash-intel - Fix irq_fpu_usable usage When renaming kernel_fpu_using to irq_fpu_usable, the semantics of the function is changed too, from mesuring whether kernel is using FPU, that is, the FPU is NOT available, to measuring whether FPU is usable, that is, the FPU is available. But the usage of irq_fpu_usable in ghash-clmulni-intel_glue.c is not changed accordingly. This patch fixes this. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 65d409644d72..cbcc8d8ea93a 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -159,7 +159,7 @@ static int ghash_async_init(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_init(cryptd_req); @@ -177,7 +177,7 @@ static int ghash_async_update(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; @@ -195,7 +195,7 @@ static int ghash_async_final(struct ahash_request *req) { struct ahash_request *cryptd_req = ahash_request_ctx(req); - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; @@ -216,7 +216,7 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (irq_fpu_usable()) { + if (!irq_fpu_usable()) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); return crypto_ahash_digest(cryptd_req); -- cgit v1.2.3 From 41a48d14f6991020c9bb6b93e289ca5b411ed09a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 5 Oct 2009 19:23:06 +0900 Subject: x86/hw-breakpoints: Actually flush thread breakpoints in flush_thread(). flush_thread() tries to do a TIF_DEBUG check before calling in to flush_thread_hw_breakpoint() (which subsequently clears the thread flag), but for some reason, the x86 code is manually clearing TIF_DEBUG immediately before the test, so this path will never be taken. This kills off the erroneous clear_tsk_thread_flag() and lets flush_thread_hw_breakpoint() actually get invoked. Presumably folks were getting lucky with testing and the free_thread_info() -> free_thread_xstate() path was taking care of the flush there. Signed-off-by: Paul Mundt Acked-by: "K.Prasad" Cc: Ingo Molnar Cc: Alan Stern LKML-Reference: <20091005102306.GA7889@linux-sh.org> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/process.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2275ce5776de..cf8ee0016307 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -107,8 +107,6 @@ void flush_thread(void) } #endif - clear_tsk_thread_flag(tsk, TIF_DEBUG); - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); -- cgit v1.2.3 From 82d6469916c6fcfa345636a49004c9d1753905d1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Oct 2009 16:41:15 -0700 Subject: xen: mask extended topology info in cpuid A Xen guest never needs to know about extended topology, and knowing would just confuse it. This patch just zeros ebx in leaf 0xb which indicates no topology info, preventing a crash under Xen on cpus which support this leaf. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 23a4d80fb39e..dfbf70e65860 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -178,6 +178,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskebx = ~0; unsigned maskecx = ~0; unsigned maskedx = ~0; @@ -185,9 +186,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) { + switch (*ax) { + case 1: maskecx = cpuid_leaf1_ecx_mask; maskedx = cpuid_leaf1_edx_mask; + break; + + case 0xb: + /* Suppress extended topology stuff */ + maskebx = 0; + break; } asm(XEN_EMULATE_PREFIX "cpuid" @@ -197,6 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=d" (*dx) : "0" (*ax), "2" (*cx)); + *bx &= maskebx; *cx &= maskecx; *dx &= maskedx; } -- cgit v1.2.3 From 97829de5a3b88899c5f3ac8802d11868bf4180ba Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 3 Nov 2009 14:02:05 -0500 Subject: x86, 64-bit: Fix bstep_iret jump This jump should be unconditional. Signed-off-by: Brian Gerst LKML-Reference: <1257274925-15713-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index af0f4b226dbe..1579a6c59cfd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1501,7 +1501,7 @@ error_kernelspace: bstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) - je error_swapgs + jmp error_swapgs END(error_entry) -- cgit v1.2.3 From 09879b99d44d701c603935ef2549004405d7f8f9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 12:58:15 +0900 Subject: x86: Gitignore: arch/x86/lib/inat-tables.c Ignore generated file arch/x86/lib/inat-tables.c. Signed-off-by: Hiroshi Shimamoto Acked-by: Masami Hiramatsu LKML-Reference: <4AF0FBD7.7000501@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/x86/lib/.gitignore (limited to 'arch/x86') diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 000000000000..8df89f0a3fe6 --- /dev/null +++ b/arch/x86/lib/.gitignore @@ -0,0 +1 @@ +inat-tables.c -- cgit v1.2.3 From ce7c42710e2dd133f10b7fc9ed9c73bdd2435f7a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:53:52 +1030 Subject: cpumask: Avoid cpumask_t in arch/x86/kernel/apic/nmi.c Ingo wants the certainty of a static cpumask (rather than a cpumask_var_t), but cpumask_t will some day be undefined to avoid on-stack declarations. This is what DECLARE_BITMAP/to_cpumask() is for. Signed-off-by: Rusty Russell LKML-Reference: <200911031453.52394.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/nmi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188a..6389432a9dbf 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,8 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_t backtrace_mask __read_mostly; +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, &backtrace_mask)) { + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); @@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, &backtrace_mask); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; } @@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(&backtrace_mask, cpu_online_mask); + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); apic->send_IPI_all(NMI_VECTOR); /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(&backtrace_mask)) + if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); } -- cgit v1.2.3 From 6ac5c5310ca9d7dd3d7e677c2715b1f06a348330 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:58:30 +1030 Subject: cpumask: Use modern cpumask style in arch/x86/kernel/cpu/mcheck/mce-inject.c Note that there's no freeing the cpu var, since this module has no unload function. Signed-off-by: Rusty Russell Cc: Andi Kleen Cc: Huang Ying LKML-Reference: <200911031458.30987.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 472763d92098..73734baa50f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -74,7 +74,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) m->finished = 0; } -static cpumask_t mce_inject_cpumask; +static cpumask_var_t mce_inject_cpumask; static int mce_raise_notify(struct notifier_block *self, unsigned long val, void *data) @@ -82,9 +82,9 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) + if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; - cpu_clear(cpu, mce_inject_cpumask); + cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) raise_exception(m, args->regs); else if (m->status) @@ -148,22 +148,22 @@ static void raise_mce(struct mce *m) unsigned long start; int cpu; get_online_cpus(); - mce_inject_cpumask = cpu_online_map; - cpu_clear(get_cpu(), mce_inject_cpumask); + cpumask_copy(mce_inject_cpumask, cpu_online_mask); + cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); for_each_online_cpu(cpu) { struct mce *mcpu = &per_cpu(injectm, cpu); if (!mcpu->finished || MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) - cpu_clear(cpu, mce_inject_cpumask); + cpumask_clear_cpu(cpu, mce_inject_cpumask); } - if (!cpus_empty(mce_inject_cpumask)) - apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); + if (!cpumask_empty(mce_inject_cpumask)) + apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); start = jiffies; - while (!cpus_empty(mce_inject_cpumask)) { + while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) { printk(KERN_ERR "Timeout waiting for mce inject NMI %lx\n", - *cpus_addr(mce_inject_cpumask)); + *cpumask_bits(mce_inject_cpumask)); break; } cpu_relax(); @@ -210,6 +210,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, static int inject_init(void) { + if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) + return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; register_die_notifier(&mce_raise_nb); -- cgit v1.2.3 From d7d3756c5b1277fafd132ce7a2211b388c3b5bd2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:58:38 +1030 Subject: cpumask: Use modern cpumask style in Xen Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge LKML-Reference: <200911031458.38406.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/xen/smp.c | 2 +- drivers/xen/cpu_hotplug.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fe03eeed7b48..738da0cb0d8b 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -73,7 +73,7 @@ static __cpuinit void cpu_bringup(void) xen_setup_cpu_clockevents(); - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); percpu_write(cpu_state, CPU_ONLINE); wmb(); diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index bdfd584ad853..0f765a920189 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -86,7 +86,7 @@ static int setup_cpu_watcher(struct notifier_block *notifier, for_each_possible_cpu(cpu) { if (vcpu_online(cpu) == 0) { (void)cpu_down(cpu); - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); } } -- cgit v1.2.3 From 89240ba059ca468ae7a8346edf7f95082458c2fc Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Tue, 3 Nov 2009 10:22:40 +0100 Subject: x86, fs: Fix x86 procfs stack information for threads on 64-bit This patch fixes two issues in the procfs stack information on x86-64 linux. The 32 bit loader compat_do_execve did not store stack start. (this was figured out by Alexey Dobriyan). The stack information on a x64_64 kernel always shows 0 kbyte stack usage, because of a missing implementation of the KSTK_ESP macro which always returned -1. The new implementation now returns the right value. Signed-off-by: Stefani Seibold Cc: Americo Wang Cc: Alexey Dobriyan Cc: Al Viro Cc: Andrew Morton LKML-Reference: <1257240160.4889.24.camel@wall-e> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process_64.c | 5 +++++ fs/compat.c | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c3429e8b2424..c9786480f0fe 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ +extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b683170..eb62cbcaa490 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -664,3 +664,8 @@ long sys_arch_prctl(int code, unsigned long addr) return do_arch_prctl(current, code, addr); } +unsigned long KSTK_ESP(struct task_struct *task) +{ + return (test_tsk_thread_flag(task, TIF_IA32)) ? + (task_pt_regs(task)->sp) : ((task)->thread.usersp); +} diff --git a/fs/compat.c b/fs/compat.c index d576b552e8e2..6c19040ffeef 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1532,6 +1532,8 @@ int compat_do_execve(char * filename, if (retval < 0) goto out; + current->stack_start = current->mm->start_stack; + /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; -- cgit v1.2.3 From a9e38c3e01ad242fe2a625354cf065c34b01e3aa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 23 Oct 2009 09:37:00 +0200 Subject: KVM: x86: Catch potential overrun in MCE setup We only allocate memory for 32 MCE banks (KVM_MAX_MCE_BANKS) but we allow user space to fill up to 255 on setup (mcg_cap & 0xff), corrupting kernel memory. Catch these overflows. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b9695322f56..8a93fa894ba6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1692,7 +1692,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num) + if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) goto out; -- cgit v1.2.3 From abb3911965c1bd8eea305f64d4840a314259d96d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 25 Oct 2009 17:42:02 +0200 Subject: KVM: get_tss_base_addr() should return a gpa_t If TSS we are switching to resides in high memory task switch will fail since address will be truncated. Windows2k3 does this sometimes when running with more then 4G Cc: stable@kernel.org Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a93fa894ba6..ae07d261527c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4051,7 +4051,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); } -static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, +static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, struct desc_struct *seg_desc) { u32 base_addr = get_desc_base(seg_desc); -- cgit v1.2.3 From 99935a7a59eaca0292c1a5880e10bae03f4a5e3d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 4 Oct 2009 21:54:24 -0700 Subject: x86/PCI: read root resources from IOH on Intel For intel systems with multi IOH, we should read peer root resources directly from PCI config space, and don't trust _CRS. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 1 + arch/x86/pci/amd_bus.c | 45 ++++++++++-------------- arch/x86/pci/bus_numa.h | 26 ++++++++++++++ arch/x86/pci/intel_bus.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 27 deletions(-) create mode 100644 arch/x86/pci/bus_numa.h create mode 100644 arch/x86/pci/intel_bus.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d49202e740ea..56d917b556c6 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o +obj-$(CONFIG_X86_64) += intel_bus.o diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 572ee9782f2a..995f36096a42 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -10,6 +10,8 @@ #include #endif +#include "bus_numa.h" + /* * This discovers the pcibus <-> node mapping on AMD K8. * also get peer root bus resource for io,mmio @@ -17,25 +19,9 @@ #ifdef CONFIG_X86_64 -/* - * sub bus (transparent) will use entres from 3 to store extra from root, - * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? - */ -#define RES_NUM 16 -struct pci_root_info { - char name[12]; - unsigned int res_num; - struct resource res[RES_NUM]; - int bus_min; - int bus_max; - int node; - int link; -}; - -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -static int pci_root_num; -static struct pci_root_info pci_root_info[PCI_ROOT_NR]; +int pci_root_num; +struct pci_root_info pci_root_info[PCI_ROOT_NR]; +static int found_all_numa_early; void x86_pci_root_bus_res_quirks(struct pci_bus *b) { @@ -48,8 +34,11 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) b->resource[1] != &iomem_resource) return; - /* if only one root bus, don't need to anything */ - if (pci_root_num < 2) + if (!pci_root_num) + return; + + /* for amd, if only one root bus, don't need to do anything */ + if (pci_root_num < 2 && found_all_numa_early) return; for (i = 0; i < pci_root_num; i++) { @@ -130,12 +119,15 @@ static void __init update_range(struct res_range *range, size_t start, } } -static void __init update_res(struct pci_root_info *info, size_t start, +void __init update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge) { int i; struct resource *res; + if (start > end) + return; + if (!merge) goto addit; @@ -230,7 +222,6 @@ static int __init early_fill_mp_bus_info(void) int j; unsigned bus; unsigned slot; - int found; int node; int link; int def_node; @@ -247,7 +238,7 @@ static int __init early_fill_mp_bus_info(void) if (!early_pci_allowed()) return -1; - found = 0; + found_all_numa_early = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { u32 id; u16 device; @@ -261,12 +252,12 @@ static int __init early_fill_mp_bus_info(void) device = (id>>16) & 0xffff; if (pci_probes[i].vendor == vendor && pci_probes[i].device == device) { - found = 1; + found_all_numa_early = 1; break; } } - if (!found) + if (!found_all_numa_early) return 0; pci_root_num = 0; @@ -488,7 +479,7 @@ static int __init early_fill_mp_bus_info(void) info = &pci_root_info[i]; res_num = info->res_num; busnum = info->bus_min; - printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", info->bus_min, info->bus_max, info->node, info->link); for (j = 0; j < res_num; j++) { res = &info->res[j]; diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h new file mode 100644 index 000000000000..4ff126a3e887 --- /dev/null +++ b/arch/x86/pci/bus_numa.h @@ -0,0 +1,26 @@ +#ifdef CONFIG_X86_64 + +/* + * sub bus (transparent) will use entres from 3 to store extra from + * root, so need to make sure we have enought slot there, Should we + * increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +extern int pci_root_num; +extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; + +extern void update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge); +#endif diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c new file mode 100644 index 000000000000..b7a55dc55d13 --- /dev/null +++ b/arch/x86/pci/intel_bus.c @@ -0,0 +1,90 @@ +/* + * to read io range from IOH pci conf, need to do it after mmconfig is there + */ + +#include +#include +#include +#include +#include + +#include "bus_numa.h" + +static inline void print_ioh_resources(struct pci_root_info *info) +{ + int res_num; + int busnum; + int i; + + printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", + info->bus_min, info->bus_max); + res_num = info->res_num; + busnum = info->bus_min; + for (i = 0; i < res_num; i++) { + struct resource *res; + + res = &info->res[i]; + printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", + busnum, i, + (res->flags & IORESOURCE_IO) ? "io port" : + "mmio", + res->start, res->end); + } +} + +#define IOH_LIO 0x108 +#define IOH_LMMIOL 0x10c +#define IOH_LMMIOH 0x110 +#define IOH_LMMIOH_BASEU 0x114 +#define IOH_LMMIOH_LIMITU 0x118 +#define IOH_LCFGBUS 0x11c + +static void __devinit pci_root_bus_res(struct pci_dev *dev) +{ + u16 word; + u32 dword; + struct pci_root_info *info; + u16 io_base, io_end; + u32 mmiol_base, mmiol_end; + u64 mmioh_base, mmioh_end; + int bus_base, bus_end; + + if (pci_root_num >= PCI_ROOT_NR) { + printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); + return; + } + + info = &pci_root_info[pci_root_num]; + pci_root_num++; + + pci_read_config_word(dev, IOH_LCFGBUS, &word); + bus_base = (word & 0xff); + bus_end = (word & 0xff00) >> 8; + sprintf(info->name, "PCI Bus #%02x", bus_base); + info->bus_min = bus_base; + info->bus_max = bus_end; + + pci_read_config_word(dev, IOH_LIO, &word); + io_base = (word & 0xf0) << (12 - 4); + io_end = (word & 0xf000) | 0xfff; + update_res(info, io_base, io_end, IORESOURCE_IO, 0); + + pci_read_config_dword(dev, IOH_LMMIOL, &dword); + mmiol_base = (dword & 0xff00) << (24 - 8); + mmiol_end = (dword & 0xff000000) | 0xffffff; + update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); + + pci_read_config_dword(dev, IOH_LMMIOH, &dword); + mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); + mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); + pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); + mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; + pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); + mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; + update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); + + print_ioh_resources(info); +} + +/* intel IOH */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); -- cgit v1.2.3 From ac1aa47b131416a6ff37eb1005a0a1d2541aad6c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 26 Oct 2009 13:20:44 -0700 Subject: PCI: determine CLS more intelligently Till now, CLS has been determined either by arch code or as L1_CACHE_BYTES. Only x86 and ia64 set CLS explicitly and x86 doesn't always get it right. On most configurations, the chance is that firmware configures the correct value during boot. This patch makes pci_init() determine CLS by looking at what firmware has configured. It scans all devices and if all non-zero values agree, the value is used. If none is configured or there is a disagreement, pci_dfl_cache_line_size is used. arch can set the dfl value (via PCI_CACHE_LINE_BYTES or pci_dfl_cache_line_size) or override the actual one. ia64, x86 and sparc64 updated to set the default cls instead of the actual one. While at it, declare pci_cache_line_size and pci_dfl_cache_line_size in pci.h and drop private declarations from arch code. Signed-off-by: Tejun Heo Acked-by: David Miller Acked-by: Greg KH Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Jesse Barnes --- arch/ia64/pci/pci.c | 9 +++------ arch/x86/pci/common.c | 8 +++----- drivers/pci/pci.c | 21 +++++++++++++-------- drivers/pci/quirks.c | 28 ++++++++++++++++++++++++++++ include/linux/pci.h | 2 ++ 5 files changed, 49 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index c0fca2c1c858..d60e7195b7dd 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -720,9 +720,6 @@ int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) return ret; } -/* It's defined in drivers/pci/pci.c */ -extern u8 pci_cache_line_size; - /** * set_pci_cacheline_size - determine cacheline size for PCI devices * @@ -731,7 +728,7 @@ extern u8 pci_cache_line_size; * * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). */ -static void __init set_pci_cacheline_size(void) +static void __init set_pci_dfl_cacheline_size(void) { unsigned long levels, unique_caches; long status; @@ -751,7 +748,7 @@ static void __init set_pci_cacheline_size(void) "(status=%ld)\n", __func__, status); return; } - pci_cache_line_size = (1 << cci.pcci_line_size) / 4; + pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; } u64 ia64_dma_get_required_mask(struct device *dev) @@ -782,7 +779,7 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init pcibios_init(void) { - set_pci_cacheline_size(); + set_pci_dfl_cacheline_size(); return 0; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1331fcf26143..fbeec31316cf 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return bus; } -extern u8 pci_cache_line_size; - int __init pcibios_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -426,11 +424,11 @@ int __init pcibios_init(void) * and P4. It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_cache_line_size = 32 >> 2; + pci_dfl_cache_line_size = 32 >> 2; if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ + pci_dfl_cache_line_size = 64 >> 2; /* K7 & K8 */ else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ + pci_dfl_cache_line_size = 128 >> 2; /* P4 */ pcibios_resource_survey(); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4e4c295a049f..1f9a7a03847b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -47,6 +47,19 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; +#ifndef PCI_CACHE_LINE_BYTES +#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES +#endif + +/* + * The default CLS is used if arch didn't set CLS explicitly and not + * all pci devices agree on the same value. Arch can override either + * the dfl or actual value as it sees fit. Don't forget this is + * measured in 32-bit words, not bytes. + */ +u8 pci_dfl_cache_line_size __initdata = PCI_CACHE_LINE_BYTES >> 2; +u8 pci_cache_line_size; + /** * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children * @bus: pointer to PCI bus structure to search @@ -1883,14 +1896,6 @@ void pci_clear_mwi(struct pci_dev *dev) #else -#ifndef PCI_CACHE_LINE_BYTES -#define PCI_CACHE_LINE_BYTES L1_CACHE_BYTES -#endif - -/* This can be overridden by arch code. */ -/* Don't forget this is measured in 32-bit words, not bytes */ -u8 pci_cache_line_size = PCI_CACHE_LINE_BYTES / 4; - /** * pci_set_cacheline_size - ensure the CACHE_LINE_SIZE register is programmed * @dev: the PCI device for which MWI is to be enabled diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 245d2cdb4765..1812ae7698de 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2595,9 +2595,37 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) static int __init pci_apply_final_quirks(void) { struct pci_dev *dev = NULL; + u8 cls = 0; + u8 tmp; + + if (pci_cache_line_size) + printk(KERN_DEBUG "PCI: CLS %u bytes\n", + pci_cache_line_size << 2); while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { pci_fixup_device(pci_fixup_final, dev); + /* + * If arch hasn't set it explicitly yet, use the CLS + * value shared by all PCI devices. If there's a + * mismatch, fall back to the default value. + */ + if (!pci_cache_line_size) { + pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &tmp); + if (!cls) + cls = tmp; + if (!tmp || cls == tmp) + continue; + + printk(KERN_DEBUG "PCI: CLS mismatch (%u != %u), " + "using %u bytes\n", cls << 2, tmp << 2, + pci_dfl_cache_line_size << 2); + pci_cache_line_size = pci_dfl_cache_line_size; + } + } + if (!pci_cache_line_size) { + printk(KERN_DEBUG "PCI: CLS %u bytes, default %u\n", + cls << 2, pci_dfl_cache_line_size << 2); + pci_cache_line_size = cls; } return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index f5c7cd343e56..b849861d78e6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1246,6 +1246,8 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; +extern u8 pci_dfl_cache_line_size; +extern u8 pci_cache_line_size; extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mem_size; -- cgit v1.2.3 From 76b1a87b217927f905f4b01c586452b2a1d33913 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 14 Oct 2009 16:31:39 -0400 Subject: x86/PCI: Use generic cacheline sizing instead of per-vendor tests. Instead of the PCI code needing to have code to determine the cacheline size of each processor, use the data the cpu identification code should have already determined during early boot. (The vendor checks are also incomplete, and don't take into account modern CPUs) I've been carrying a variant of this code in Fedora for a while, that prints debug information. There are a number of cases where we are currently setting the PCI cacheline size to 32 bytes, when the CPU cacheline size is 64 bytes. With this patch, we set them both the same. Signed-off-by: Dave Jones Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index fbeec31316cf..d2552c68e94d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -420,15 +420,19 @@ int __init pcibios_init(void) } /* - * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 - * and P4. It's also good for 386/486s (which actually have 16) + * Set PCI cacheline size to that of the CPU if the CPU has reported it. + * (For older CPUs that don't support cpuid, we se it to 32 bytes + * It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_dfl_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_dfl_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_dfl_cache_line_size = 128 >> 2; /* P4 */ + if (c->x86_clflush_size > 0) { + pci_dfl_cache_line_size = c->x86_clflush_size >> 2; + printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n", + pci_dfl_cache_line_size << 2); + } else { + pci_dfl_cache_line_size = 32 >> 2; + printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); + } pcibios_resource_survey(); -- cgit v1.2.3 From 42887b29ced263ec3b8bd26ef157a324789b89d9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Oct 2009 15:33:49 -0600 Subject: x86/PCI: print resources consistently with %pRt This uses %pRt to print additional resource information (type, size, prefetchability, etc.) consistently. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 14 +++++++++++--- arch/x86/pci/i386.c | 12 +++++------- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1014eb4bfc37..9b3daf976732 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -7,6 +7,7 @@ #include struct pci_root_info { + struct acpi_device *bridge; char *name; unsigned int res_num; struct resource *res; @@ -107,12 +108,18 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->child = NULL; if (insert_resource(root, res)) { - printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s\n", (unsigned long) res->start, - (unsigned long) res->end, root->name, info->name); + dev_err(&info->bridge->dev, "can't allocate %pRt\n", res); } else { info->bus->resource[info->res_num] = res; info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window: %pRt " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, + "host bridge window: %pRt\n", res); } return AE_OK; } @@ -124,6 +131,7 @@ get_current_resources(struct acpi_device *device, int busnum, struct pci_root_info info; size_t size; + info.bridge = device; info.bus = bus; info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b22d13b0c71d..a70a85de5e84 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -164,12 +164,10 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", - (unsigned long long) r->start, - (unsigned long long) r->end, - r->flags, disabled, pass); + dev_dbg(&dev->dev, "%pRf (d=%d, p=%d)\n", r, + disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -182,7 +180,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - dev_dbg(&dev->dev, "disabling ROM\n"); + dev_dbg(&dev->dev, "disabling ROM %pRt\n", r); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); -- cgit v1.2.3 From 2992e545ea006992ec9dc91c4fa996ce1e15f921 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 13:21:32 -0800 Subject: x86/PCI/PAT: return EINVAL for pci mmap WC request for !pat_enabled Thomas Schlichter reported: > X.org uses libpciaccess which tries to mmap with write combining enabled via > /sys/bus/pci/devices/*/resource0_wc. Currently, when PAT is not enabled, the > kernel does fall back to uncached mmap. Then libpciaccess thinks it succeeded > mapping with write combining enabled and does not set up suited MTRR entries. > ;-( Instead of silently mapping pci mmap region as UC minus in the case of !pat_enabled and wc request, we can return error. Eric Anholt mentioned that caller (like X) typically follows up with UC minus pci mmap request and if there is a free mtrr slot, caller will manage adding WC mtrr. Jesse Barnes says: > Older versions of libpciaccess will behave better if we do it that way > (iirc it only allocates an MTRR if the resource_wc file doesn't exist or > fails to get mapped). Reported-by: Thomas Schlichter Signed-off-by: Thomas Schlichter Signed-off-by: Suresh Siddha Acked-by: Eric Anholt Acked-by: Jesse Barnes Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a70a85de5e84..52e656f17781 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -280,6 +280,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); + + /* + * Return error if pat is not enabled and write_combine is requested. + * Caller can followup with UC MINUS request and add a WC mtrr if there + * is a free mtrr slot. + */ + if (!pat_enabled && write_combine) + return -EINVAL; + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; else if (pat_enabled || boot_cpu_data.x86 > 3) -- cgit v1.2.3 From 9a08f7d3506019e3833cd4394ca0d7da0ae3689f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 23 Oct 2009 15:20:33 -0600 Subject: x86/PCI: allow MMCONFIG above 4GB The current whitelist requires a kernel change for every machine that has MMCONFIG regions above 4GB, even if BIOS provides a correct MCFG table. This patch expands the whitelist to include machines with a rev 1 or newer MCFG table and a DMI_BIOS_DATE of 2010 or later. That way, we only need kernel changes for new machines that provide incorrect MCFG tables. Signed-off-by: Bjorn Helgaas CC: Matthew Wilcox CC: John Keller CC: Yinghai Lu CC: Kenji Kaneshige CC: Andi Kleen Acked-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 602c172d3bd5..02642773c29d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -527,18 +528,31 @@ reject: static int __initdata known_bridge; -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; -static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, + struct acpi_mcfg_allocation *cfg) { + int year; + + if (cfg->address < 0xFFFFFFFF) + return 0; + if (!strcmp(mcfg->header.oem_id, "SGI")) - acpi_mcfg_64bit_base_addr = TRUE; + return 0; - return 0; + if (mcfg->header.revision >= 1) { + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && + year >= 2010) + return 0; + } + + printk(KERN_ERR PREFIX "MCFG region for %04x:%02x-%02x at %#llx " + "is above 4GB, ignored\n", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number, cfg->address); + return -EINVAL; } static int __init pci_parse_mcfg(struct acpi_table_header *header) @@ -574,13 +588,8 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) memcpy(pci_mmcfg_config, &mcfg[1], config_size); - acpi_mcfg_oem_check(mcfg); - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && - !acpi_mcfg_64bit_base_addr) { - printk(KERN_ERR PREFIX - "MMCONFIG not in low 4GB of memory\n"); + if (acpi_mcfg_check_entry(mcfg, &pci_mmcfg_config[i])) { kfree(pci_mmcfg_config); pci_mmcfg_config_num = 0; return -ENODEV; -- cgit v1.2.3 From 1ccbf5344c3daef046d2323190cc6807c44f1917 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 6 Oct 2009 15:11:14 -0700 Subject: xen: move Xen-testing predicates to common header Move xen_domain and related tests out of asm-x86 to xen/xen.h so they can be included whenever they are necessary. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jesse Barnes --- arch/x86/include/asm/xen/hypervisor.h | 27 --------------------------- arch/x86/xen/enlighten.c | 1 + drivers/block/xen-blkfront.c | 1 + drivers/char/hvc_xen.c | 2 ++ drivers/input/xen-kbdfront.c | 3 +++ drivers/net/xen-netfront.c | 1 + drivers/video/xen-fbfront.c | 3 +++ drivers/xen/balloon.c | 2 ++ drivers/xen/cpu_hotplug.c | 1 + drivers/xen/evtchn.c | 2 ++ drivers/xen/grant-table.c | 1 + drivers/xen/sys-hypervisor.c | 1 + drivers/xen/xenbus/xenbus_probe.c | 2 ++ drivers/xen/xenfs/super.c | 2 ++ include/xen/xen.h | 32 ++++++++++++++++++++++++++++++++ 15 files changed, 54 insertions(+), 27 deletions(-) create mode 100644 include/xen/xen.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d5b7e90c0edf..396ff4cc8ed4 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,31 +37,4 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -enum xen_domain_type { - XEN_NATIVE, /* running on bare hardware */ - XEN_PV_DOMAIN, /* running in a PV domain */ - XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ -}; - -#ifdef CONFIG_XEN -extern enum xen_domain_type xen_domain_type; -#else -#define xen_domain_type XEN_NATIVE -#endif - -#define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) - -#ifdef CONFIG_XEN_DOM0 -#include - -#define xen_initial_domain() (xen_pv_domain() && \ - xen_start_info->flags & SIF_INITDOMAIN) -#else /* !CONFIG_XEN_DOM0 */ -#define xen_initial_domain() (0) -#endif /* CONFIG_XEN_DOM0 */ - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 23a4d80fb39e..5bccd706232c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b8578bb3f4c9..05a31e55d278 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index eba999f8598d..93d33816c9d9 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -25,6 +25,8 @@ #include #include + +#include #include #include #include diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index b115726dc088..c721c0a23eb8 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -21,7 +21,10 @@ #include #include #include + #include + +#include #include #include #include diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index baa051d5bfbe..a869b45d3d37 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 54cd91610174..966b226c858c 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -25,7 +25,10 @@ #include #include #include + #include + +#include #include #include #include diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d31505b6f7a4..826dda414166 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -52,6 +52,8 @@ #include #include + +#include #include #include #include diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index bdfd584ad853..6625ffe1a6fd 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 79bedba44fee..f70a4f4698c5 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -48,6 +48,8 @@ #include #include #include + +#include #include #include #include diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7d8f531fb8e8..4c6c0bd636a8 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 88a60e03ccf0..ae5cb05a1a1c 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index d42e25d5968d..08638adec9fc 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -49,6 +49,8 @@ #include #include #include + +#include #include #include #include diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 6559e0c752ce..8924d93136f1 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -13,6 +13,8 @@ #include #include +#include + #include "xenfs.h" #include diff --git a/include/xen/xen.h b/include/xen/xen.h new file mode 100644 index 000000000000..a16402418d31 --- /dev/null +++ b/include/xen/xen.h @@ -0,0 +1,32 @@ +#ifndef _XEN_XEN_H +#define _XEN_XEN_H + +enum xen_domain_type { + XEN_NATIVE, /* running on bare hardware */ + XEN_PV_DOMAIN, /* running in a PV domain */ + XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ +}; + +#ifdef CONFIG_XEN +extern enum xen_domain_type xen_domain_type; +#else +#define xen_domain_type XEN_NATIVE +#endif + +#define xen_domain() (xen_domain_type != XEN_NATIVE) +#define xen_pv_domain() (xen_domain() && \ + xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain() && \ + xen_domain_type == XEN_HVM_DOMAIN) + +#ifdef CONFIG_XEN_DOM0 +#include +#include + +#define xen_initial_domain() (xen_pv_domain() && \ + xen_start_info->flags & SIF_INITDOMAIN) +#else /* !CONFIG_XEN_DOM0 */ +#define xen_initial_domain() (0) +#endif /* CONFIG_XEN_DOM0 */ + +#endif /* _XEN_XEN_H */ -- cgit v1.2.3 From af5a8ee05404112f38fb2904747c688bdc31a746 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 14 Oct 2009 10:27:42 -0600 Subject: x86/PCI: use -DDEBUG when CONFIG_PCI_DEBUG set We use dev_dbg() in arch/x86/pci, but there's no easy way to turn it on. Add -DDEBUG when CONFIG_PCI_DEBUG=y, just like we do in drivers/pci. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 56d917b556c6..d8a0a6279a4d 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -16,3 +16,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o obj-$(CONFIG_X86_64) += intel_bus.o + +ifeq ($(CONFIG_PCI_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif -- cgit v1.2.3 From c7dabef8a2c59e6a3de9d66fc35fb6a43ef7172d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 27 Oct 2009 13:26:47 -0600 Subject: vsprintf: use %pR, %pr instead of %pRt, %pRf Jesse accidentally applied v1 [1] of the patchset instead of v2 [2]. This is the diff between v1 and v2. The changes in this patch are: - tidied vsprintf stack buffer to shrink and compute size more accurately - use %pR for decoding and %pr for "raw" (with type and flags) instead of adding %pRt and %pRf [1] http://lkml.org/lkml/2009/10/6/491 [2] http://lkml.org/lkml/2009/10/13/441 Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/ia64/pci/pci.c | 11 ++++--- arch/x86/pci/acpi.c | 7 +++-- arch/x86/pci/i386.c | 11 +++---- drivers/pci/pci.c | 2 +- drivers/pci/probe.c | 17 ++++++----- drivers/pci/quirks.c | 2 +- drivers/pci/setup-bus.c | 79 ++++++++++++++++++++++--------------------------- drivers/pci/setup-res.c | 20 +++++++------ drivers/pnp/quirks.c | 5 ++-- drivers/pnp/resource.c | 8 ++--- drivers/pnp/support.c | 2 +- drivers/pnp/system.c | 2 +- lib/vsprintf.c | 55 ++++++++++++++++++---------------- 13 files changed, 114 insertions(+), 107 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 06413b827e97..df639db779f9 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -298,18 +298,19 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data) window->offset = offset; if (insert_resource(root, &window->resource)) { - dev_err(&info->bridge->dev, "can't allocate %pRt\n", + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", &window->resource); } else { if (offset) - dev_info(&info->bridge->dev, "host bridge window: %pRt " + dev_info(&info->bridge->dev, "host bridge window %pR " "(PCI address [%#llx-%#llx])\n", &window->resource, window->resource.start - offset, window->resource.end - offset); else dev_info(&info->bridge->dev, - "host bridge window: %pRt\n", + "host bridge window %pR\n", &window->resource); } @@ -330,7 +331,9 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl) (res->end - res->start < 16)) continue; if (j >= PCI_BUS_NUM_RESOURCES) { - dev_warn(&bus->dev, "ignoring %pRf (no space)\n", res); + dev_warn(&bus->dev, + "ignoring host bridge window %pR (no space)\n", + res); continue; } bus->resource[j++] = res; diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 9b3daf976732..6bf8091d2fd5 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -108,18 +108,19 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->child = NULL; if (insert_resource(root, res)) { - dev_err(&info->bridge->dev, "can't allocate %pRt\n", res); + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", res); } else { info->bus->resource[info->res_num] = res; info->res_num++; if (addr.translation_offset) - dev_info(&info->bridge->dev, "host bridge window: %pRt " + dev_info(&info->bridge->dev, "host bridge window %pR " "(PCI address [%#llx-%#llx])\n", res, res->start - addr.translation_offset, res->end - addr.translation_offset); else dev_info(&info->bridge->dev, - "host bridge window: %pRt\n", res); + "host bridge window %pR\n", res); } return AE_OK; } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 52e656f17781..d49d17de7b3f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); + dev_info(&dev->dev, "BAR %d: can't allocate %pR\n", idx, r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -164,10 +164,11 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - dev_dbg(&dev->dev, "%pRf (d=%d, p=%d)\n", r, - disabled, pass); + dev_dbg(&dev->dev, + "BAR %d: claiming %pr (d=%d, p=%d)\n", + idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", idx, r); + dev_info(&dev->dev, "BAR %d: can't claim %pR\n", idx, r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -180,7 +181,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - dev_dbg(&dev->dev, "disabling ROM %pRt\n", r); + dev_dbg(&dev->dev, "disabling ROM %pR\n", r); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 557218222826..f0da1676d2be 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1713,7 +1713,7 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_n return 0; err_out: - dev_warn(&pdev->dev, "BAR %d: can't reserve %pRt\n", bar, + dev_warn(&pdev->dev, "BAR %d: can't reserve %pR\n", bar, &pdev->resource[bar]); return -EBUSY; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4842b09b7f3c..4c4aca53ae09 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -225,12 +225,13 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, if (!sz64) goto fail; - res->flags |= IORESOURCE_MEM_64; - if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) { dev_err(&dev->dev, "can't handle 64-bit BAR\n"); goto fail; - } else if ((sizeof(resource_size_t) < 8) && l) { + } + + res->flags |= IORESOURCE_MEM_64; + if ((sizeof(resource_size_t) < 8) && l) { /* Address above 32-bit boundary; disable the BAR */ pci_write_config_dword(dev, pos, 0); pci_write_config_dword(dev, pos + 4, 0); @@ -239,7 +240,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, } else { res->start = l64; res->end = l64 + sz64; - dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pRt\n", + dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res); } } else { @@ -251,7 +252,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->start = l; res->end = l + sz; - dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pRt\n", pos, res); + dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res); } out: @@ -319,7 +320,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->start = base; if (!res->end) res->end = limit + 0xfff; - dev_printk(KERN_DEBUG, &dev->dev, "bridge window: %pRt\n", res); + dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } res = child->resource[1]; @@ -331,7 +332,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; res->start = base; res->end = limit + 0xfffff; - dev_printk(KERN_DEBUG, &dev->dev, "bridge window: %pRt\n", res); + dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } res = child->resource[2]; @@ -370,7 +371,7 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) res->flags |= IORESOURCE_MEM_64; res->start = base; res->end = limit + 0xfffff; - dev_printk(KERN_DEBUG, &dev->dev, "bridge window: %pRt\n", res); + dev_printk(KERN_DEBUG, &dev->dev, " bridge window %pR\n", res); } } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c0c4537d66da..7cfa7c38d318 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -357,7 +357,7 @@ static void __devinit quirk_io_region(struct pci_dev *dev, unsigned region, pcibios_bus_to_resource(dev, res, &bus_region); pci_claim_resource(dev, nr); - dev_info(&dev->dev, "quirk: region %04x-%04x claimed by %s\n", region, region + size - 1, name); + dev_info(&dev->dev, "quirk: %pR claimed by %s\n", res, name); } } diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ceb75333862b..ed6916bac675 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -71,53 +71,50 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus) void pci_setup_cardbus(struct pci_bus *bus) { struct pci_dev *bridge = bus->self; + struct resource *res; struct pci_bus_region region; dev_info(&bridge->dev, "CardBus bridge, secondary bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); - if (bus->resource[0]->flags & IORESOURCE_IO) { + res = bus->resource[0]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_IO) { /* * The IO resource is allocated a range twice as large as it * would normally need. This allows us to set both IO regs. */ - dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_IO_BASE_0, region.start); pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_0, region.end); } - pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]); - if (bus->resource[1]->flags & IORESOURCE_IO) { - dev_info(&bridge->dev, " IO window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + res = bus->resource[1]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_IO) { + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_IO_BASE_1, region.start); pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_1, region.end); } - pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); - if (bus->resource[2]->flags & IORESOURCE_MEM) { - dev_info(&bridge->dev, " PREFETCH window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + res = bus->resource[2]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_MEM) { + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0, region.start); pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_0, region.end); } - pcibios_resource_to_bus(bridge, ®ion, bus->resource[3]); - if (bus->resource[3]->flags & IORESOURCE_MEM) { - dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + res = bus->resource[3]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_MEM) { + dev_info(&bridge->dev, " bridge window %pR\n", res); pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1, region.start); pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_1, @@ -140,6 +137,7 @@ EXPORT_SYMBOL(pci_setup_cardbus); static void pci_setup_bridge(struct pci_bus *bus) { struct pci_dev *bridge = bus->self; + struct resource *res; struct pci_bus_region region; u32 l, bu, lu, io_upper16; int pref_mem64; @@ -151,23 +149,22 @@ static void pci_setup_bridge(struct pci_bus *bus) pci_domain_nr(bus), bus->number); /* Set up the top and bottom of the PCI I/O segment for this bus. */ - pcibios_resource_to_bus(bridge, ®ion, bus->resource[0]); - if (bus->resource[0]->flags & IORESOURCE_IO) { + res = bus->resource[0]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_IO) { pci_read_config_dword(bridge, PCI_IO_BASE, &l); l &= 0xffff0000; l |= (region.start >> 8) & 0x00f0; l |= region.end & 0xf000; /* Set up upper 16 bits of I/O base/limit. */ io_upper16 = (region.end & 0xffff0000) | (region.start >> 16); - dev_info(&bridge->dev, " IO window: %#04lx-%#04lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); } else { /* Clear upper 16 bits of I/O base/limit. */ io_upper16 = 0; l = 0x00f0; - dev_info(&bridge->dev, " IO window: disabled\n"); + dev_info(&bridge->dev, " bridge window [io disabled]\n"); } /* Temporarily disable the I/O range before updating PCI_IO_BASE. */ pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff); @@ -178,17 +175,16 @@ static void pci_setup_bridge(struct pci_bus *bus) /* Set up the top and bottom of the PCI Memory segment for this bus. */ - pcibios_resource_to_bus(bridge, ®ion, bus->resource[1]); - if (bus->resource[1]->flags & IORESOURCE_MEM) { + res = bus->resource[1]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_MEM) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - dev_info(&bridge->dev, " MEM window: %#08lx-%#08lx\n", - (unsigned long)region.start, - (unsigned long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); } else { l = 0x0000fff0; - dev_info(&bridge->dev, " MEM window: disabled\n"); + dev_info(&bridge->dev, " bridge window [mem disabled]\n"); } pci_write_config_dword(bridge, PCI_MEMORY_BASE, l); @@ -200,24 +196,21 @@ static void pci_setup_bridge(struct pci_bus *bus) /* Set up PREF base/limit. */ pref_mem64 = 0; bu = lu = 0; - pcibios_resource_to_bus(bridge, ®ion, bus->resource[2]); - if (bus->resource[2]->flags & IORESOURCE_PREFETCH) { - int width = 8; + res = bus->resource[2]; + pcibios_resource_to_bus(bridge, ®ion, res); + if (res->flags & IORESOURCE_PREFETCH) { l = (region.start >> 16) & 0xfff0; l |= region.end & 0xfff00000; - if (bus->resource[2]->flags & IORESOURCE_MEM_64) { + if (res->flags & IORESOURCE_MEM_64) { pref_mem64 = 1; bu = upper_32_bits(region.start); lu = upper_32_bits(region.end); - width = 16; } - dev_info(&bridge->dev, " PREFETCH window: %#0*llx-%#0*llx\n", - width, (unsigned long long)region.start, - width, (unsigned long long)region.end); + dev_info(&bridge->dev, " bridge window %pR\n", res); } else { l = 0x0000fff0; - dev_info(&bridge->dev, " PREFETCH window: disabled\n"); + dev_info(&bridge->dev, " bridge window [mem pref disabled]\n"); } pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); @@ -391,7 +384,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, order = __ffs(align) - 20; if (order > 11) { dev_warn(&dev->dev, "BAR %d: bad alignment %llx: " - "%pRt\n", i, (unsigned long long)align, r); + "%pR\n", i, (unsigned long long)align, r); r->flags = 0; continue; } @@ -582,7 +575,7 @@ static void pci_bus_dump_res(struct pci_bus *bus) if (!res || !res->end) continue; - dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pRt\n", i, res); + dev_printk(KERN_DEBUG, &bus->dev, "resource %d %pR\n", i, res); } } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 5e78f2096ce8..357ca5c54607 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -51,9 +51,11 @@ void pci_update_resource(struct pci_dev *dev, int resno) pcibios_resource_to_bus(dev, ®ion, res); - dev_dbg(&dev->dev, "BAR %d: got %pRf (bus addr [%#llx-%#llx])\n", - resno, res, (unsigned long long)region.start, - (unsigned long long)region.end); + dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] " + "flags %#lx\n", resno, res, + (unsigned long long)region.start, + (unsigned long long)region.end, + (unsigned long)res->flags); new = region.start | (res->flags & PCI_REGION_FLAG_MASK); if (res->flags & IORESOURCE_IO) @@ -89,8 +91,8 @@ void pci_update_resource(struct pci_dev *dev, int resno) } } res->flags &= ~IORESOURCE_UNSET; - dev_dbg(&dev->dev, "BAR %d: moved to bus addr [%#llx-%#llx]\n", - resno, (unsigned long long)region.start, + dev_dbg(&dev->dev, "BAR %d: moved to %pR (bus addr [%#llx-%#llx])\n", + resno, res, (unsigned long long)region.start, (unsigned long long)region.end); } @@ -108,7 +110,7 @@ int pci_claim_resource(struct pci_dev *dev, int resource) if (err) { const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; - dev_err(&dev->dev, "BAR %d: %s %s %pRt\n", + dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", resource, root ? "address space collision on" : "no parent found for", @@ -179,7 +181,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) align = pci_resource_alignment(dev, res); if (!align) { - dev_info(&dev->dev, "BAR %d: can't allocate %pRf " + dev_info(&dev->dev, "BAR %d: can't allocate %pR " "(bogus alignment)\n", resno, res); return -EINVAL; } @@ -196,7 +198,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } if (ret) - dev_info(&dev->dev, "BAR %d: can't allocate %pRt\n", + dev_info(&dev->dev, "BAR %d: can't allocate %pR\n", resno, res); return ret; @@ -222,7 +224,7 @@ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) r_align = pci_resource_alignment(dev, r); if (!r_align) { - dev_warn(&dev->dev, "BAR %d: bogus alignment %pRf\n", + dev_warn(&dev->dev, "BAR %d: %pR has bogus alignment\n", i, r); continue; } diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 3a2031b25c3c..dfbd5a6cc58b 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -285,8 +285,9 @@ static void quirk_system_pci_resources(struct pnp_dev *dev) * the PCI region, and that might prevent a PCI * driver from requesting its resources. */ - dev_warn(&dev->dev, "resource %pRt overlaps %s " - "BAR %d %pRt, disabling\n", res, + dev_warn(&dev->dev, + "disabling %pR because it overlaps " + "%s BAR %d %pR\n", res, pci_name(pdev), i, &pdev->resource[i]); res->flags |= IORESOURCE_DISABLED; } diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index 18557d7bb0b9..64d0596bafb5 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -517,7 +517,7 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, res->start = irq; res->end = irq; - pnp_dbg(&dev->dev, " add %pRf\n", res); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } @@ -538,7 +538,7 @@ struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma, res->start = dma; res->end = dma; - pnp_dbg(&dev->dev, " add %pRf\n", res); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } @@ -562,7 +562,7 @@ struct pnp_resource *pnp_add_io_resource(struct pnp_dev *dev, res->start = start; res->end = end; - pnp_dbg(&dev->dev, " add %pRf\n", res); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } @@ -586,7 +586,7 @@ struct pnp_resource *pnp_add_mem_resource(struct pnp_dev *dev, res->start = start; res->end = end; - pnp_dbg(&dev->dev, " add %pRf\n", res); + pnp_dbg(&dev->dev, " add %pr\n", res); return pnp_res; } diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 1f8a33b0abac..9585c1c1cc36 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -82,7 +82,7 @@ void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc) else { pnp_dbg(&dev->dev, "%s: current resources:\n", desc); list_for_each_entry(pnp_res, &dev->resources, list) - pnp_dbg(&dev->dev, "%pRf\n", &pnp_res->res); + pnp_dbg(&dev->dev, "%pr\n", &pnp_res->res); } } diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 242d3a872011..49c1720df59a 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -48,7 +48,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * example do reserve stuff they know about too, so we may well * have double reservations. */ - dev_info(&dev->dev, "%pRt %s reserved\n", r, + dev_info(&dev->dev, "%pR %s reserved\n", r, res ? "has been" : "could not be"); } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a6e195163eb3..6438cd5599ee 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -624,13 +624,19 @@ static char *resource_string(char *buf, char *end, struct resource *res, .precision = -1, .flags = SPECIAL | SMALL, }; - /* - * room for three actual numbers (decimal or hex), plus - * "[mem 0x-0x 64bit pref disabled flags 0x]\0" - */ - char sym[3*3*sizeof(resource_size_t) + 41]; + + /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8) + * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ +#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) +#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) +#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref disabled]") +#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") + char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; + char *p = sym, *pend = sym + sizeof(sym); int size = -1, addr = 0; + int decode = (fmt[0] == 'R') ? 1 : 0; if (res->flags & IORESOURCE_IO) { size = IO_RSRC_PRINTK_SIZE; @@ -641,15 +647,17 @@ static char *resource_string(char *buf, char *end, struct resource *res, } *p++ = '['; - if (fmt[1] == 't' || fmt[1] == 'f') { - if (res->flags & IORESOURCE_IO) - p = string(p, pend, "io ", str_spec); - else if (res->flags & IORESOURCE_MEM) - p = string(p, pend, "mem ", str_spec); - else if (res->flags & IORESOURCE_IRQ) - p = string(p, pend, "irq ", str_spec); - else if (res->flags & IORESOURCE_DMA) - p = string(p, pend, "dma ", str_spec); + if (res->flags & IORESOURCE_IO) + p = string(p, pend, "io ", str_spec); + else if (res->flags & IORESOURCE_MEM) + p = string(p, pend, "mem ", str_spec); + else if (res->flags & IORESOURCE_IRQ) + p = string(p, pend, "irq ", str_spec); + else if (res->flags & IORESOURCE_DMA) + p = string(p, pend, "dma ", str_spec); + else { + p = string(p, pend, "??? ", str_spec); + decode = 0; } hex_spec.field_width = size; p = number(p, pend, res->start, addr ? hex_spec : dec_spec); @@ -657,21 +665,19 @@ static char *resource_string(char *buf, char *end, struct resource *res, *p++ = '-'; p = number(p, pend, res->end, addr ? hex_spec : dec_spec); } - if (fmt[1] == 't' || fmt[1] == 'f') { + if (decode) { if (res->flags & IORESOURCE_MEM_64) p = string(p, pend, " 64bit", str_spec); if (res->flags & IORESOURCE_PREFETCH) p = string(p, pend, " pref", str_spec); if (res->flags & IORESOURCE_DISABLED) p = string(p, pend, " disabled", str_spec); - if (fmt[1] == 'f') { - p = string(p, pend, " flags ", str_spec); - p = number(p, pend, res->flags & ~IORESOURCE_TYPE_BITS, - flag_spec); - } + } else { + p = string(p, pend, " flags ", str_spec); + p = number(p, pend, res->flags, flag_spec); } *p++ = ']'; - *p = 0; + *p = '\0'; return string(buf, end, sym, spec); } @@ -847,10 +853,8 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr, * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers with offset * - 's' For symbolic direct pointers without offset - * - 'R' For a struct resource pointer, print: - * R address range only ([0x0-0x1f]) - * Rt type and range ([mem 0x0-0x1f 64bit pref]) - * Rf type, range, and flags ([mem 0x0-0x1f 64bit pref flags 0x1]) + * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] + * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -881,6 +885,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'S': return symbol_string(buf, end, ptr, spec, *fmt); case 'R': + case 'r': return resource_string(buf, end, ptr, spec, fmt); case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ -- cgit v1.2.3 From 2a6bed8301f8b019717504575a3f9c6cce1fe271 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:32:47 -0700 Subject: x86/PCI: print domain:bus in conventional format Use the dev_printk-like "%04x:%02x" format for printing PCI bus numbers. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 6bf8091d2fd5..68b89dc7d761 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -172,8 +172,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif if (domain && !pci_domains_supported) { - printk(KERN_WARNING "PCI: Multiple domains not supported " - "(dom %d, bus %d)\n", domain, busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -197,7 +198,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do */ sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); return NULL; } -- cgit v1.2.3 From 865df576e8fc70daf297b53e61a4fbefc719d065 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:32:57 -0700 Subject: PCI: improve discovery/configuration messages This makes PCI resource management messages more consistent and adds a few new messages to aid debugging. Whenever we assign resources to a device, update a BAR, or change a bridge aperture, it's worth noting it. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 9 +++++--- drivers/pci/pci.c | 2 +- drivers/pci/probe.c | 13 +++++++---- drivers/pci/setup-bus.c | 21 ++++++++++++----- drivers/pci/setup-res.c | 60 ++++++++++++++++++++++++++----------------------- 5 files changed, 63 insertions(+), 42 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index d49d17de7b3f..b73c09f45210 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate %pR\n", idx, r); + dev_info(&dev->dev, + "can't reserve window %pR\n", + r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -165,10 +167,11 @@ static void __init pcibios_allocate_resources(int pass) disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { dev_dbg(&dev->dev, - "BAR %d: claiming %pr (d=%d, p=%d)\n", + "BAR %d: reserving %pr (d=%d, p=%d)\n", idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't claim %pR\n", idx, r); + dev_info(&dev->dev, + "can't reserve %pR\n", r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 930eadf4670f..f88de099ef43 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2575,7 +2575,7 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) return reg; } - dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); + dev_err(&dev->dev, "BAR %d: invalid resource\n", resno); return 0; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a7fdc4344cef..623086f9ba84 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -226,7 +226,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, goto fail; if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) { - dev_err(&dev->dev, "can't handle 64-bit BAR\n"); + dev_err(&dev->dev, "reg %x: can't handle 64-bit BAR\n", + pos); goto fail; } @@ -294,8 +295,11 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child) if (pci_is_root_bus(child)) /* It's a host bus, nothing to read */ return; + dev_info(&dev->dev, "PCI bridge to [bus %02x-%02x]%s\n", + child->secondary, child->subordinate, + dev->transparent ? " (subtractive decode)": ""); + if (dev->transparent) { - dev_info(&dev->dev, "transparent bridge\n"); for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++) child->resource[i] = child->parent->resource[i - 3]; } @@ -645,13 +649,14 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, (child->number > bus->subordinate) || (child->number < bus->number) || (child->subordinate < bus->number)) { - pr_debug("PCI: Bus #%02x (-#%02x) is %s " - "hidden behind%s bridge #%02x (-#%02x)\n", + dev_info(&child->dev, "[bus %02x-%02x] %s " + "hidden behind%s bridge %s [bus %02x-%02x]\n", child->number, child->subordinate, (bus->number > child->subordinate && bus->subordinate < child->number) ? "wholly" : "partially", bus->self->transparent ? " transparent" : "", + dev_name(&bus->dev), bus->number, bus->subordinate); } bus = bus->parent; diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ed6916bac675..502d1704c533 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -74,8 +74,8 @@ void pci_setup_cardbus(struct pci_bus *bus) struct resource *res; struct pci_bus_region region; - dev_info(&bridge->dev, "CardBus bridge, secondary bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + dev_info(&bridge->dev, "CardBus bridge to [bus %02x-%02x]\n", + bus->secondary, bus->subordinate); res = bus->resource[0]; pcibios_resource_to_bus(bridge, ®ion, res); @@ -145,8 +145,8 @@ static void pci_setup_bridge(struct pci_bus *bus) if (pci_is_enabled(bridge)) return; - dev_info(&bridge->dev, "PCI bridge, secondary bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n", + bus->secondary, bus->subordinate); /* Set up the top and bottom of the PCI I/O segment for this bus. */ res = bus->resource[0]; @@ -338,6 +338,10 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size) #endif size = ALIGN(size + size1, 4096); if (!size) { + if (b_res->start || b_res->end) + dev_info(&bus->self->dev, "disabling bridge window " + "%pR to [bus %02x-%02x] (unused)\n", b_res, + bus->secondary, bus->subordinate); b_res->flags = 0; return; } @@ -383,8 +387,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; if (order > 11) { - dev_warn(&dev->dev, "BAR %d: bad alignment %llx: " - "%pR\n", i, (unsigned long long)align, r); + dev_warn(&dev->dev, "disabling BAR %d: %pR " + "(bad alignment %#llx)\n", i, r, + (unsigned long long) align); r->flags = 0; continue; } @@ -418,6 +423,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, } size = ALIGN(size, min_align); if (!size) { + if (b_res->start || b_res->end) + dev_info(&bus->self->dev, "disabling bridge window " + "%pR to [bus %02x-%02x] (unused)\n", b_res, + bus->secondary, bus->subordinate); b_res->flags = 0; return 1; } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 357ca5c54607..7d678bb15ffb 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -51,12 +51,6 @@ void pci_update_resource(struct pci_dev *dev, int resno) pcibios_resource_to_bus(dev, ®ion, res); - dev_dbg(&dev->dev, "BAR %d: got res %pR bus [%#llx-%#llx] " - "flags %#lx\n", resno, res, - (unsigned long long)region.start, - (unsigned long long)region.end, - (unsigned long)res->flags); - new = region.start | (res->flags & PCI_REGION_FLAG_MASK); if (res->flags & IORESOURCE_IO) mask = (u32)PCI_BASE_ADDRESS_IO_MASK; @@ -91,9 +85,9 @@ void pci_update_resource(struct pci_dev *dev, int resno) } } res->flags &= ~IORESOURCE_UNSET; - dev_dbg(&dev->dev, "BAR %d: moved to %pR (bus addr [%#llx-%#llx])\n", - resno, res, (unsigned long long)region.start, - (unsigned long long)region.end); + dev_info(&dev->dev, "BAR %d: set to %pR (PCI address [%#llx-%#llx]\n", + resno, res, (unsigned long long)region.start, + (unsigned long long)region.end); } int pci_claim_resource(struct pci_dev *dev, int resource) @@ -103,20 +97,17 @@ int pci_claim_resource(struct pci_dev *dev, int resource) int err; root = pci_find_parent_resource(dev, res); - - err = -EINVAL; - if (root != NULL) - err = request_resource(root, res); - - if (err) { - const char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge"; - dev_err(&dev->dev, "BAR %d: %s of %s %pR\n", - resource, - root ? "address space collision on" : - "no parent found for", - dtype, res); + if (!root) { + dev_err(&dev->dev, "no compatible bridge window for %pR\n", + res); + return -EINVAL; } + err = request_resource(root, res); + if (err) + dev_err(&dev->dev, + "address space collision: %pR already in use\n", res); + return err; } EXPORT_SYMBOL(pci_claim_resource); @@ -124,7 +115,7 @@ EXPORT_SYMBOL(pci_claim_resource); #ifdef CONFIG_PCI_QUIRKS void pci_disable_bridge_window(struct pci_dev *dev) { - dev_dbg(&dev->dev, "Disabling bridge window.\n"); + dev_info(&dev->dev, "disabling bridge mem windows\n"); /* MMIO Base/Limit */ pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0); @@ -165,6 +156,7 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } @@ -178,10 +170,11 @@ int pci_assign_resource(struct pci_dev *dev, int resno) resource_size_t align; struct pci_bus *bus; int ret; + char *type; align = pci_resource_alignment(dev, res); if (!align) { - dev_info(&dev->dev, "BAR %d: can't allocate %pR " + dev_info(&dev->dev, "BAR %d: can't assign %pR " "(bogus alignment)\n", resno, res); return -EINVAL; } @@ -197,9 +190,20 @@ int pci_assign_resource(struct pci_dev *dev, int resno) break; } - if (ret) - dev_info(&dev->dev, "BAR %d: can't allocate %pR\n", - resno, res); + if (ret) { + if (res->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_PREFETCH) + type = "mem pref"; + else + type = "mem"; + else if (res->flags & IORESOURCE_IO) + type = "io"; + else + type = "unknown"; + dev_info(&dev->dev, + "BAR %d: can't assign %s (size %#llx)\n", + resno, type, (unsigned long long) resource_size(res)); + } return ret; } @@ -272,8 +276,8 @@ int pci_enable_resources(struct pci_dev *dev, int mask) continue; if (!r->parent) { - dev_err(&dev->dev, "device not available because of " - "BAR %d %pR collisions\n", i, r); + dev_err(&dev->dev, "device not available " + "(can't reserve %pR)\n", r); return -EINVAL; } -- cgit v1.2.3 From f1db6fde09e201218f488d7205a7cd7bc448d496 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:39:13 -0700 Subject: x86/PCI: for debuggability, show host bridge windows even when ignoring _CRS We have occasional problems with PCI resource allocation, and sometimes they could be avoided by paying attention to what ACPI tells us about the host bridges. This patch doesn't change the behavior, but it prints window information that should make debugging easier. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 68b89dc7d761..54db5a04b5e1 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -92,11 +92,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) start = addr.minimum + addr.translation_offset; end = start + addr.address_length - 1; if (info->res_num >= max_root_bus_resources) { - printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s due to _CRS returning more than " - "%d resource descriptors\n", (unsigned long) start, - (unsigned long) end, root->name, info->name, - max_root_bus_resources); + if (pci_probe & PCI_USE__CRS) + printk(KERN_WARNING "PCI: Failed to allocate " + "0x%lx-0x%lx from %s for %s due to _CRS " + "returning more than %d resource descriptors\n", + (unsigned long) start, (unsigned long) end, + root->name, info->name, max_root_bus_resources); return AE_OK; } @@ -107,6 +108,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->end = end; res->child = NULL; + if (!(pci_probe & PCI_USE__CRS)) { + dev_printk(KERN_DEBUG, &info->bridge->dev, + "host bridge window %pR (ignored)\n", res); + return AE_OK; + } + if (insert_resource(root, res)) { dev_err(&info->bridge->dev, "can't allocate host bridge window %pR\n", res); @@ -132,6 +139,11 @@ get_current_resources(struct acpi_device *device, int busnum, struct pci_root_info info; size_t size; + if (!(pci_probe & PCI_USE__CRS)) + dev_info(&device->dev, + "ignoring host bridge windows from ACPI; " + "boot with \"pci=use_crs\" to use them\n"); + info.bridge = device; info.bus = bus; info.res_num = 0; @@ -220,9 +232,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } else { bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); if (bus) { - if (pci_probe & PCI_USE__CRS) - get_current_resources(device, busnum, domain, - bus); + get_current_resources(device, busnum, domain, bus); bus->subordinate = pci_scan_child_bus(bus); } } -- cgit v1.2.3 From 03db42adfeeabe856dbb6894dd3aaff55838330a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 4 Nov 2009 10:39:18 -0700 Subject: x86/PCI: fix bogus host bridge window start/end alignment from _CRS PCI device BARs are guaranteed to start and end on at least a four-byte (I/O) or a sixteen-byte (MMIO) boundary because they're aligned on their size and the low BAR bits are reserved. PCI-to-PCI bridge apertures have even larger alignment restrictions. However, some BIOSes (e.g., HP DL360 BIOS P31) report host bridge windows like "[io 0x0000-0x2cfe]". This is wrong because it excludes the last port at 0x2cff: it's impossible for a downstream device to claim 0x2cfe without also claiming 0x2cff. In fact, this BIOS configures a device behind the bridge to "[io 0x2c00-0x2cff]", so we know the window actually does include 0x2cff. This patch rounds the start and end of apertures to the appropriate boundary. I experimentally determined that Windows contains a similar workaround; details here: http://bugzilla.kernel.org/show_bug.cgi?id=14337 Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 54db5a04b5e1..8ddf4f4c7253 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -59,6 +59,30 @@ bus_has_transparent_bridge(struct pci_bus *bus) return false; } +static void +align_resource(struct acpi_device *bridge, struct resource *res) +{ + int align = (res->flags & IORESOURCE_MEM) ? 16 : 4; + + /* + * Host bridge windows are not BARs, but the decoders on the PCI side + * that claim this address space have starting alignment and length + * constraints, so fix any obvious BIOS goofs. + */ + if (res->start & (align - 1)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning start to %d-byte boundary\n", res, align); + res->start &= ~(align - 1); + } + if ((res->end + 1) & (align - 1)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning end to %d-byte boundary\n", res, align); + res->end = roundup(res->end, align) - 1; + } +} + static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data) { @@ -107,6 +131,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->start = start; res->end = end; res->child = NULL; + align_resource(info->bridge, res); if (!(pci_probe & PCI_USE__CRS)) { dev_printk(KERN_DEBUG, &info->bridge->dev, -- cgit v1.2.3 From 2c75910f1aa042be1dd769378d2611bf551721ac Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Thu, 5 Nov 2009 11:47:08 +0100 Subject: x86: Make sure get_user_desc() doesn't sign extend. The current implementation of get_user_desc() sign extends the return value because of integer promotion rules. For the most part, this doesn't matter, because the top bit of base2 is usually 0. If, however, that bit is 1, then the entire value will be 0xffff... which is probably not what the caller intended. This patch casts the entire thing to unsigned before returning, which generates almost the same assembly as the current code but replaces the final "cltq" (sign extend) with a "mov %eax %eax" (zero-extend). This fixes booting certain guests under KVM. Signed-off-by: Chris Lalancette Signed-off-by: Linus Torvalds --- arch/x86/include/asm/desc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e8de2f6f5ca5..617bd56b3070 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc) static inline unsigned long get_desc_base(const struct desc_struct *desc) { - return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); + return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); } static inline void set_desc_base(struct desc_struct *desc, unsigned long base) -- cgit v1.2.3 From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Nov 2009 23:06:50 +0100 Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to linux/hw_breakpoint.h We plan to make the breakpoints parameters generic among architectures. For that it's better to move the asm-generic header to a generic linux header. Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/hw_breakpoint.h | 2 +- include/asm-generic/hw_breakpoint.h | 139 ----------------------------------- include/linux/hw_breakpoint.h | 136 ++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 140 deletions(-) delete mode 100644 include/asm-generic/hw_breakpoint.h create mode 100644 include/linux/hw_breakpoint.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 1acb4d45de70..3cfca8e2b5f6 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,7 +12,7 @@ struct arch_hw_breakpoint { }; #include -#include +#include /* Available HW breakpoint length encodings */ #define HW_BREAKPOINT_LEN_1 0x40 diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h deleted file mode 100644 index 9bf2d12eb74a..000000000000 --- a/include/asm-generic/hw_breakpoint.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef _ASM_GENERIC_HW_BREAKPOINT_H -#define _ASM_GENERIC_HW_BREAKPOINT_H - -#ifndef __ARCH_HW_BREAKPOINT_H -#error "Please don't include this file directly" -#endif - -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; -}; - -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ - -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -/* - * Kernel breakpoints are not associated with any particular thread. - */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); - -extern unsigned int hbp_kernel_pos; - -#endif /* __KERNEL__ */ -#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 000000000000..61ccc8f17eac --- /dev/null +++ b/include/linux/hw_breakpoint.h @@ -0,0 +1,136 @@ +#ifndef _LINUX_HW_BREAKPOINT_H +#define _LINUX_HW_BREAKPOINT_H + + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v1.2.3 From c3359fbce4b65d542d02c30aa5174c8e4838da2d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 00:59:52 -0700 Subject: sysctl: x86 Use the compat_sys_sysctl Now that we have a generic 32bit compatibility implementation there is no need for x86 to implement it's own. Cc: Thomas Gleixner Cc: Ingo Molnar Acked-by: H. Peter Anvin Signed-off-by: Eric W. Biederman --- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/ia32/sys_ia32.c | 56 ----------------------------------------- arch/x86/include/asm/sys_ia32.h | 5 ---- 3 files changed, 1 insertion(+), 62 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 581b0568fe19..5d2584839be4 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -653,7 +653,7 @@ ia32_sys_call_table: .quad compat_sys_writev .quad sys_getsid .quad sys_fdatasync - .quad sys32_sysctl /* sysctl */ + .quad compat_sys_sysctl /* sysctl */ .quad sys_mlock /* 150 */ .quad sys_munlock .quad sys_mlockall diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 9f5527198825..df82c0e48ded 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -434,62 +434,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, return ret; } -#ifdef CONFIG_SYSCTL_SYSCALL -struct sysctl_ia32 { - unsigned int name; - int nlen; - unsigned int oldval; - unsigned int oldlenp; - unsigned int newval; - unsigned int newlen; - unsigned int __unused[4]; -}; - - -asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32) -{ - struct sysctl_ia32 a32; - mm_segment_t old_fs = get_fs(); - void __user *oldvalp, *newvalp; - size_t oldlen; - int __user *namep; - long ret; - - if (copy_from_user(&a32, args32, sizeof(a32))) - return -EFAULT; - - /* - * We need to pre-validate these because we have to disable - * address checking before calling do_sysctl() because of - * OLDLEN but we can't run the risk of the user specifying bad - * addresses here. Well, since we're dealing with 32 bit - * addresses, we KNOW that access_ok() will always succeed, so - * this is an expensive NOP, but so what... - */ - namep = compat_ptr(a32.name); - oldvalp = compat_ptr(a32.oldval); - newvalp = compat_ptr(a32.newval); - - if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) - || !access_ok(VERIFY_WRITE, namep, 0) - || !access_ok(VERIFY_WRITE, oldvalp, 0) - || !access_ok(VERIFY_WRITE, newvalp, 0)) - return -EFAULT; - - set_fs(KERNEL_DS); - lock_kernel(); - ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen, - newvalp, (size_t) a32.newlen); - unlock_kernel(); - set_fs(old_fs); - - if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp))) - return -EFAULT; - - return ret; -} -#endif - /* warning: next two assume little endian */ asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 72a6dcd1299b..9af9decb38c3 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -51,11 +51,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); -#ifdef CONFIG_SYSCTL_SYSCALL -struct sysctl_ia32; -asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); -#endif - asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); -- cgit v1.2.3 From ea7f1b6ee9dc96c5827b06ba21d7769d553efb7d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 5 Nov 2009 11:17:11 -0600 Subject: x86/PCI: remove 64-bit division The roundup() caused a build error (undefined reference to `__udivdi3'). We're aligning to power-of-two boundaries, so it's simpler to just use ALIGN() anyway, which avoids the division. Signed-off-by: Bjorn Helgaas Acked-by: Randy Dunlap Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 8ddf4f4c7253..959e548a7039 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -69,17 +69,17 @@ align_resource(struct acpi_device *bridge, struct resource *res) * that claim this address space have starting alignment and length * constraints, so fix any obvious BIOS goofs. */ - if (res->start & (align - 1)) { + if (!IS_ALIGNED(res->start, align)) { dev_printk(KERN_DEBUG, &bridge->dev, "host bridge window %pR invalid; " "aligning start to %d-byte boundary\n", res, align); res->start &= ~(align - 1); } - if ((res->end + 1) & (align - 1)) { + if (!IS_ALIGNED(res->end + 1, align)) { dev_printk(KERN_DEBUG, &bridge->dev, "host bridge window %pR invalid; " "aligning end to %d-byte boundary\n", res, align); - res->end = roundup(res->end, align) - 1; + res->end = ALIGN(res->end, align) - 1; } } -- cgit v1.2.3 From f1b291d4c47440cbfc1a478e88800e2742d60a80 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Fri, 6 Nov 2009 15:44:04 +0100 Subject: x86: Add Phoenix/MSC BIOSes to lowmem corruption list We have a board with a Phoenix/MSC BIOS which also corrupts the low 64KB of RAM, so add an entry to the table. Signed-off-by: Simon Kagstrom LKML-Reference: <20091106154404.002648d9@marrow.netinsight.se> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..2a34f9c5be21 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -659,6 +659,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix/MSC BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), + }, + }, { /* * AMI BIOS with low memory corruption was found on Intel DG45ID board. -- cgit v1.2.3 From c12a229bc5971534537a7d0e49e44f9f1f5d0336 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 5 Nov 2009 11:03:59 -0500 Subject: x86: Remove unused thread_return label from switch_to() Remove unused thread_return label from switch_to() macro on x86-64. Since this symbol cuts into schedule(), backtrace at the latter half of schedule() was always shown as thread_return(). Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20091105160359.5181.26225.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f97374892..1a953e26401c 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -128,8 +128,6 @@ do { \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ - ".globl thread_return\n" \ - "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ -- cgit v1.2.3 From 0d0fbbddcc27c062815732b38c44b544e656c799 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Nov 2009 22:45:41 +1030 Subject: x86, msr, cpumask: Use struct cpumask rather than the deprecated cpumask_t This makes the declarations match the definitions, which already use 'struct cpumask'. Signed-off-by: Rusty Russell Acked-by: Borislav Petkov LKML-Reference: <200911052245.41803.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 9a00219b331a..5bef931f8b14 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } -static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } -static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); -- cgit v1.2.3 From de2a47cf2b3f59ef9664b277f4021b91af13598e Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 5 Nov 2009 10:43:51 +0800 Subject: x86: Fix error return sequence in __ioremap_caller() kernel missed to free memtype if get_vm_area_caller failed in __ioremap_caller. This patch introduces error path to fix this and cleans up the repetitive error return sequences that contributed to the creation of the bug. Signed-off-by: Xiaotian Feng Acked-by: Suresh Siddha Cc: Venkatesh Pallipadi Cc: H. Peter Anvin LKML-Reference: <1257389031-20429-1-git-send-email-dfeng@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 334e63ca7b2b..2feb9bdedaaf 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), prot_val, new_prot_val); - free_memtype(phys_addr, phys_addr + size); - return NULL; + goto err_free_memtype; } prot_val = new_prot_val; } @@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, */ area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) - return NULL; + goto err_free_memtype; area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { - free_memtype(phys_addr, phys_addr + size); - free_vm_area(area); - return NULL; - } + if (kernel_map_sync_memtype(phys_addr, size, prot_val)) + goto err_free_area; - if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { - free_memtype(phys_addr, phys_addr + size); - free_vm_area(area); - return NULL; - } + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) + goto err_free_area; ret_addr = (void __iomem *) (vaddr + offset); mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); return ret_addr; +err_free_area: + free_vm_area(area); +err_free_memtype: + free_memtype(phys_addr, phys_addr + size); + return NULL; } /** -- cgit v1.2.3 From 338bac527ed0e35b4cb50390972f15d3cbce92ca Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 27 Oct 2009 16:34:44 +0900 Subject: x86: Use x86_platform for iommu_shutdown This patch cleans up pci_iommu_shutdown() a bit to use x86_platform (similar to how IA64 initializes an IOMMU driver). This adds iommu_shutdown() to x86_platform to avoid calling every IOMMUs' shutdown functions in pci_iommu_shutdown() in order. The IOMMU shutdown functions are platform specific (we don't have multiple different IOMMU hardware) so the current way is pointless. An IOMMU driver sets x86_platform.iommu_shutdown to the shutdown function if necessary. Signed-off-by: FUJITA Tomonori Cc: joerg.roedel@amd.com LKML-Reference: <20091027163358F.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/amd_iommu.h | 2 -- arch/x86/include/asm/gart.h | 4 ---- arch/x86/include/asm/iommu.h | 2 +- arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/amd_iommu_init.c | 6 +----- arch/x86/kernel/crash.c | 5 ++--- arch/x86/kernel/pci-dma.c | 7 ------- arch/x86/kernel/pci-gart_64.c | 6 ++++-- arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/x86_init.c | 2 ++ 10 files changed, 13 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 4b180897e6b5..3604669f7b15 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -30,12 +30,10 @@ extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_shutdown(void); extern void amd_iommu_apply_erratum_63(u16 devid); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } -static inline void amd_iommu_shutdown(void) { } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa409d8..4fdd5b3f87b1 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -36,7 +36,6 @@ extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); extern void gart_iommu_init(void); -extern void gart_iommu_shutdown(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -51,9 +50,6 @@ static inline void early_gart_iommu_check(void) static inline void gart_iommu_init(void) { } -static inline void gart_iommu_shutdown(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21bbee6c..878b30715766 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern void pci_iommu_shutdown(void); +static inline void iommu_shutdown_noop(void) {} extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd4ab0e..66008ed80b7a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -121,6 +121,7 @@ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); + void (*iommu_shutdown)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c20001e4f556..6acd43e9afd7 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1297,6 +1297,7 @@ int __init amd_iommu_init(void) else printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); + x86_platform.iommu_shutdown = disable_iommus; out: return ret; @@ -1323,11 +1324,6 @@ free: goto out; } -void amd_iommu_shutdown(void) -{ - disable_iommus(); -} - /**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc298a4..a4849c10a77e 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,8 +27,7 @@ #include #include #include -#include - +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif crash_save_cpu(regs, safe_smp_processor_id()); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b2a71dca5642..ce2fb91bbed1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -303,13 +303,6 @@ static int __init pci_iommu_init(void) no_iommu_init(); return 0; } - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); - - amd_iommu_shutdown(); -} /* Must execute after PCI subsystem */ rootfs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a7f1b64f86e0..a9bcdf7c8801 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -39,6 +39,7 @@ #include #include #include +#include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -688,12 +689,12 @@ static struct dma_map_ops gart_dma_ops = { .free_coherent = gart_free_coherent, }; -void gart_iommu_shutdown(void) +static void gart_iommu_shutdown(void) { struct pci_dev *dev; int i; - if (no_agp && (dma_ops != &gart_dma_ops)) + if (no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { @@ -838,6 +839,7 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; + x86_platform.iommu_shutdown = gart_iommu_shutdown; } void __init gart_parse_options(char *p) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e00..2b97fc5b124e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -23,7 +23,7 @@ # include # include #else -# include +# include #endif /* @@ -622,7 +622,7 @@ void native_machine_shutdown(void) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2ed..bc9b230ef402 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -14,6 +14,7 @@ #include #include #include +#include void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } @@ -72,4 +73,5 @@ struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, + .iommu_shutdown = iommu_shutdown_noop, }; -- cgit v1.2.3 From 2ae8bb75db1f3de422eb5898f2a063c46c36dba8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Oct 2009 15:41:46 +0100 Subject: x86: Fix iommu=nodac parameter handling iommu=nodac should forbid dac instead of enabling it. Fix it. Signed-off-by: Tejun Heo Acked-by: FUJITA Tomonori Cc: Matteo Frigo Cc: # .32.x and older LKML-Reference: <4AE5B52A.4050408@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index ce2fb91bbed1..839d49a669bc 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -216,7 +216,7 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; + forbid_dac = 1; if (!strncmp(p, "usedac", 6)) { forbid_dac = -1; return 1; -- cgit v1.2.3 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- arch/Kconfig | 3 + arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/debugreg.h | 11 +- arch/x86/include/asm/hw_breakpoint.h | 58 +++-- arch/x86/include/asm/processor.h | 12 +- arch/x86/kernel/hw_breakpoint.c | 391 +++++++++++++++++++++----------- arch/x86/kernel/process.c | 7 +- arch/x86/kernel/process_32.c | 26 +-- arch/x86/kernel/process_64.c | 26 +-- arch/x86/kernel/ptrace.c | 182 ++++++++++----- arch/x86/kernel/smpboot.c | 3 - arch/x86/kvm/x86.c | 18 +- arch/x86/power/cpu.c | 6 - include/linux/hw_breakpoint.h | 243 ++++++++++---------- include/linux/perf_event.h | 26 ++- kernel/exit.c | 5 + kernel/hw_breakpoint.c | 424 ++++++++++++++--------------------- kernel/perf_event.c | 53 ++++- kernel/trace/trace.h | 5 +- kernel/trace/trace_entries.h | 6 +- kernel/trace/trace_ksym.c | 126 +++++------ kernel/trace/trace_selftest.c | 3 +- 22 files changed, 885 insertions(+), 750 deletions(-) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index acb664397945..eef3bbb97075 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS source "kernel/gcov/Kconfig" diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..9f828f87ca35 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 23439fbb1d0e..9a3333c91f9a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,13 +75,8 @@ */ #ifdef __KERNEL__ -/* For process management */ -extern void flush_thread_hw_breakpoint(struct task_struct *tsk); -extern int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags); +DECLARE_PER_CPU(unsigned long, dr7); -/* For CPU management */ -extern void load_debug_registers(void); static inline void hw_breakpoint_disable(void) { /* Zero the control register for HW Breakpoint */ @@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +#ifdef CONFIG_KVM +extern void hw_breakpoint_restore(void); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 3cfca8e2b5f6..0675a7c4c20e 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -4,6 +4,11 @@ #ifdef __KERNEL__ #define __ARCH_HW_BREAKPOINT_H +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ struct arch_hw_breakpoint { char *name; /* Contains name of the symbol to set bkpt */ unsigned long address; @@ -12,44 +17,57 @@ struct arch_hw_breakpoint { }; #include -#include +#include +#include /* Available HW breakpoint length encodings */ -#define HW_BREAKPOINT_LEN_1 0x40 -#define HW_BREAKPOINT_LEN_2 0x44 -#define HW_BREAKPOINT_LEN_4 0x4c -#define HW_BREAKPOINT_LEN_EXECUTE 0x40 +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 -#define HW_BREAKPOINT_LEN_8 0x48 +#define X86_BREAKPOINT_LEN_8 0x48 #endif /* Available HW breakpoint type encodings */ /* trigger on instruction execute */ -#define HW_BREAKPOINT_EXECUTE 0x80 +#define X86_BREAKPOINT_EXECUTE 0x80 /* trigger on memory write */ -#define HW_BREAKPOINT_WRITE 0x81 +#define X86_BREAKPOINT_WRITE 0x81 /* trigger on memory read or write */ -#define HW_BREAKPOINT_RW 0x83 +#define X86_BREAKPOINT_RW 0x83 /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; -DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); -extern unsigned int hbp_user_refcount[HBP_NUM]; +struct perf_event; +struct pmu; -extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_uninstall_thread_hw_breakpoint(void); extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk); -extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); -extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_update_kernel_hw_breakpoint(void *); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, - unsigned long val, void *data); + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + #endif /* __KERNEL__ */ #endif /* _I386_HW_BREAKPOINT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 61aafb71c7ef..820f3000f736 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -444,12 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg[HBP_NUM]; - unsigned long debugreg6; - unsigned long debugreg7; - /* Hardware breakpoint info */ - struct hw_breakpoint *hbp[HBP_NUM]; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 9316a9de4de3..e622620790bd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker */ /* @@ -22,6 +23,8 @@ * using the CPU's debug registers. */ +#include +#include #include #include #include @@ -38,26 +41,24 @@ #include #include -/* Unmasked kernel DR7 value */ -static unsigned long kdr7; +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); /* - * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. - * Used to clear and verify the status of bits corresponding to DR0 - DR3 + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus */ -static const unsigned long dr7_masks[HBP_NUM] = { - 0x000f0003, /* LEN0, R/W0, G0, L0 */ - 0x00f0000c, /* LEN1, R/W1, G1, L1 */ - 0x0f000030, /* LEN2, R/W2, G2, L2 */ - 0xf00000c0 /* LEN3, R/W3, G3, L3 */ -}; +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); /* * Encode the length, type, Exact, and Enable bits for a particular breakpoint * as stored in debug register 7. */ -static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; @@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) return bp_info; } -void arch_update_kernel_hw_breakpoint(void *unused) +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) { - struct hw_breakpoint *bp; - int i, cpu = get_cpu(); - unsigned long temp_kdr7 = 0; - - /* Don't allow debug exceptions while we update the registers */ - set_debugreg(0UL, 7); + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - for (i = hbp_kernel_pos; i < HBP_NUM; i++) { - per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; - if (bp) { - temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); - set_debugreg(bp->info.address, i); - } - } + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; - /* No need to set DR6. Update the debug registers with kernel-space - * breakpoint values from kdr7 and user-space requests from the - * current process - */ - kdr7 = temp_kdr7; - set_debugreg(kdr7 | current->thread.debugreg7, 7); - put_cpu(); + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } /* - * Install the thread breakpoints in their debug registers. + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - - switch (hbp_kernel_pos) { - case 4: - set_debugreg(thread->debugreg[3], 3); - case 3: - set_debugreg(thread->debugreg[2], 2); - case 2: - set_debugreg(thread->debugreg[1], 1); - case 1: - set_debugreg(thread->debugreg[0], 0); - default: - break; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } } - /* No need to set DR6 */ - set_debugreg((kdr7 | thread->debugreg7), 7); + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; } /* - * Install the debug register values for just the kernel, no thread. + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_uninstall_thread_hw_breakpoint(void) +void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - /* Clear the user-space portion of debugreg7 by setting only kdr7 */ - set_debugreg(kdr7, 7); + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + dr7 = &__get_cpu_var(dr7); + *dr7 &= ~encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); } static int get_hbp_len(u8 hbp_len) @@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len) unsigned int len_in_bytes = 0; switch (hbp_len) { - case HW_BREAKPOINT_LEN_1: + case X86_BREAKPOINT_LEN_1: len_in_bytes = 1; break; - case HW_BREAKPOINT_LEN_2: + case X86_BREAKPOINT_LEN_2: len_in_bytes = 2; break; - case HW_BREAKPOINT_LEN_4: + case X86_BREAKPOINT_LEN_4: len_in_bytes = 4; break; #ifdef CONFIG_X86_64 - case HW_BREAKPOINT_LEN_8: + case X86_BREAKPOINT_LEN_8: len_in_bytes = 8; break; #endif @@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) /* * Store a breakpoint's encoded address, length, and type. */ -static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +static int arch_store_info(struct perf_event *bp) { - /* - * User-space requests will always have the address field populated - * Symbol names from user-space are rejected - */ - if (tsk && bp->info.name) - return -EINVAL; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); /* * For kernel-addresses, either the address or symbol name can be * specified. */ - if (bp->info.name) - bp->info.address = (unsigned long) - kallsyms_lookup_name(bp->info.name); - if (bp->info.address) + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) return 0; + return -EINVAL; } -/* - * Validate the arch-specific HW Breakpoint register settings - */ -int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk) +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) { - unsigned int align; - int ret = -EINVAL; + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } - switch (bp->info.type) { - /* - * Ptrace-refactoring code - * For now, we'll allow instruction breakpoint only for user-space - * addresses - */ - case HW_BREAKPOINT_EXECUTE: - if ((!arch_check_va_in_userspace(bp->info.address, - bp->info.len)) && - bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) - return ret; + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; break; - case HW_BREAKPOINT_WRITE: + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; - case HW_BREAKPOINT_RW: + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; break; default: - return ret; + return -EINVAL; } - switch (bp->info.len) { + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: - align = 0; + info->len = X86_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - align = 1; + info->len = X86_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_4: - align = 3; + info->len = X86_BREAKPOINT_LEN_4; break; #ifdef CONFIG_X86_64 case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: align = 7; break; #endif @@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, return ret; } - if (bp->triggered) - ret = arch_store_info(bp, tsk); + if (bp->callback) + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, * Check that the low-order bits of the address are appropriate * for the alignment implied by len. */ - if (bp->info.address & align) + if (info->address & align) return -EINVAL; /* Check that the virtual address is in the proper range */ if (tsk) { - if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + if (!arch_check_va_in_userspace(info->address, info->len)) return -EFAULT; } else { - if (!arch_check_va_in_kernelspace(bp->info.address, - bp->info.len)) + if (!arch_check_va_in_kernelspace(info->address, info->len)) return -EFAULT; } + return 0; } -void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { - struct thread_struct *thread = &(tsk->thread); - struct hw_breakpoint *bp = thread->hbp[pos]; - - thread->debugreg7 &= ~dr7_masks[pos]; - if (bp) { - thread->debugreg[pos] = bp->info.address; - thread->debugreg7 |= encode_dr7(pos, bp->info.len, - bp->info.type); - } else - thread->debugreg[pos] = 0; + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } -void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +#ifdef CONFIG_KVM +void hw_breakpoint_restore(void) { - int i; - struct thread_struct *thread = &(tsk->thread); - - thread->debugreg7 = 0; - for (i = 0; i < HBP_NUM; i++) - thread->debugreg[i] = 0; + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(dr7), 7); } +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); +#endif /* * Handle debug exception notifications. @@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; - struct hw_breakpoint *bp; + struct perf_event *bp; unsigned long dr7, dr6; unsigned long *dr6_p; @@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - /* Lazy debug register switching */ - if (!test_tsk_thread_flag(current, TIF_DEBUG)) - arch_uninstall_thread_hw_breakpoint(); - get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); @@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; + /* - * Find the corresponding hw_breakpoint structure and - * invoke its triggered callback. + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. */ - if (i >= hbp_kernel_pos) - bp = per_cpu(this_hbp_kernel[i], cpu); - else { - bp = current->thread.hbp[i]; - if (bp) - rc = NOTIFY_DONE; - } + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching - * or due to the delay between updates of hbp_kernel_pos - * and this_hbp_kernel. + * or due to concurrent perf counter removing. */ - if (!bp) - continue; + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); - (bp->triggered)(bp, args->regs); + rcu_read_unlock(); } if (dr6 & (~DR_TRAP_BITS)) rc = NOTIFY_DONE; set_debugreg(dr7, 7); put_cpu(); + return rc; } @@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify( return hw_breakpoint_handler(data); } + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cf8ee0016307..744508e7cfdd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -107,8 +105,7 @@ void flush_thread(void) } #endif - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 209e74801763..d5bd3132ee70 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,7 +59,6 @@ #include #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) - goto out; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); -out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 72edac026a78..5bafdec34441 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -53,7 +53,6 @@ #include #include #include -#include asmlinkage extern void ret_from_fork(void); @@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task) BUG(); } } - if (unlikely(dead_task->thread.debugreg7)) - flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, savesegment(ds, p->thread.ds); err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(me, p, clone_flags)) - goto out; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -351,8 +346,6 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); return err; } @@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 267cb85b479c..e79610d95971 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, - unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) +static void ptrace_triggered(struct perf_event *bp, void *data) { - struct thread_struct *thread = &(current->thread); int i; + struct thread_struct *thread = &(current->thread); /* * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ - for (i = 0; i < hbp_kernel_pos; i++) - /* - * We will check bp->info.address against the address stored in - * thread's hbp structure and not debugreg[i]. This is to ensure - * that the corresponding bit for 'i' in DR7 register is enabled - */ - if (bp->info.address == thread->hbp[i]->info.address) + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) break; + } thread->debugreg6 |= (DR_TRAP0 << i); } +/* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +{ + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } + } + + return dr7; +} + /* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7 = thread->debugreg7; + unsigned long old_dr7; int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - struct hw_breakpoint *bp; + int gen_len, gen_type; + struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: /* * Loop through all the hardware breakpoints, making the @@ -496,11 +503,12 @@ restore: */ for (i = 0; i < HBP_NUM; i++) { enabled = decode_dr7(data, i, &len, &type); - bp = thread->hbp[i]; + bp = thread->ptrace_bps[i]; if (!enabled) { if (bp) { - /* Don't unregister the breakpoints right-away, + /* + * Don't unregister the breakpoints right-away, * unless all register_user_hw_breakpoint() * requests have succeeded. This prevents * any window of opportunity for debug @@ -508,27 +516,45 @@ restore: */ if (!second_pass) continue; - unregister_user_hw_breakpoint(tsk, bp); - kfree(bp); + thread->ptrace_bps[i] = NULL; + unregister_hw_breakpoint(bp); } continue; } + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ if (!bp) { - rc = -ENOMEM; - bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (bp) { - bp->info.address = thread->debugreg[i]; - bp->triggered = ptrace_triggered; - bp->info.len = len; - bp->info.type = type; - rc = register_user_hw_breakpoint(tsk, bp); - if (rc) - kfree(bp); - } - } else - rc = modify_user_hw_breakpoint(tsk, bp); + rc = -EINVAL; + break; + } + + rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (rc) break; + + /* + * This is a temporary thing as bp is unregistered/registered + * to simulate modification + */ + bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, + gen_type, bp->callback, + tsk, true); + thread->ptrace_bps[i] = NULL; + + if (!bp) { /* incorrect bp, or we have a bug in bp API */ + rc = -EINVAL; + break; + } + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + bp = NULL; + break; + } + thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; - if (n < HBP_NUM) - val = thread->debugreg[n]; - else if (n == 6) + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { val = thread->debugreg6; - else if (n == 7) - val = thread->debugreg7; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } return val; } +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + + if (!t->ptrace_bps[nr]) { + /* + * Put stub len and type to register (reserve) an inactive but + * correct bp + */ + bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, + HW_BREAKPOINT_W, + ptrace_triggered, tsk, + false); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, + bp->attr.bp_type, + bp->callback, + tsk, + bp->attr.disabled); + } + + if (!bp) + return -EIO; + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; + + return 0; +} + /* * Handle PTRACE_POKEUSR calls for the debug register area. */ @@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return -EIO; if (n == 6) { - tsk->thread.debugreg6 = val; + thread->debugreg6 = val; goto ret_path; } if (n < HBP_NUM) { - if (thread->hbp[n]) { - if (arch_check_va_in_userspace(val, - thread->hbp[n]->info.len) == 0) { - rc = -EIO; - goto ret_path; - } - thread->hbp[n]->info.address = val; - } - thread->debugreg[n] = val; + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } /* All that's left is DR7 */ if (n == 7) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 213a7a3e4562..565ebc65920e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include @@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - load_debug_registers(); cpu_idle(); } @@ -1269,7 +1267,6 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); - hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc2974adf9b6..22dee7aa7813 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include #include #include #include @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index e09a44fc4664..0a979f3e5b8a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif - hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -144,11 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - load_debug_registers(); } /** diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 61ccc8f17eac..7eba9b92e5f3 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,136 +1,131 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H +#include -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, }; -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ +enum { + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_X = 4, +}; + +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + +static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) +{ + return bp->attr.bp_addr; +} + +static inline int hw_breakpoint_type(struct perf_event *bp) +{ + return bp->attr.bp_type; +} + +static inline int hw_breakpoint_len(struct perf_event *bp) +{ + return bp->attr.bp_len; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); + +/* FIXME: only change from the attr, and don't unregister */ +extern struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); /* * Kernel breakpoints are not associated with any particular thread. */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active); + +extern struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active); + +extern int register_perf_hw_breakpoint(struct perf_event *bp); +extern int __register_perf_hw_breakpoint(struct perf_event *bp); +extern void unregister_hw_breakpoint(struct perf_event *bp); +extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); + +extern int reserve_bp_slot(struct perf_event *bp); +extern void release_bp_slot(struct perf_event *bp); + +extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + +static inline struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) { return NULL; } +static inline struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { return NULL; } +static inline int +register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline int +__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline void unregister_hw_breakpoint(struct perf_event *bp) { } +static inline void +unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } +static inline int +reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } +static inline void release_bp_slot(struct perf_event *bp) { } + +static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } -extern unsigned int hbp_kernel_pos; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ -#endif /* _LINUX_HW_BREAKPOINT_H */ +#endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8d54e6d25eeb..cead64ea6c15 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -18,6 +18,10 @@ #include #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include +#endif + /* * User-space ABI bits: */ @@ -31,6 +35,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -207,6 +212,15 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; + + union { + struct { /* Hardware breakpoint info */ + __u64 bp_addr; + __u32 bp_type; + __u32 bp_len; + }; + }; + __u32 __reserved_2; __u64 __reserved_3; @@ -476,6 +490,11 @@ struct hw_perf_event { atomic64_t count; struct hrtimer hrtimer; }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + union { /* breakpoint */ + struct arch_hw_breakpoint info; + }; +#endif }; atomic64_t prev_count; u64 sample_period; @@ -588,7 +607,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -643,6 +662,8 @@ struct perf_event { perf_callback_t callback; + perf_callback_t event_callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } diff --git a/kernel/exit.c b/kernel/exit.c index e61891f80123..266f8920628a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); + /* + * FIXME: do that only when needed, using sched_exit tracepoint + */ + flush_ptrace_hw_breakpoint(tsk); /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c1f64e65a9f3..08f6d0163201 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 + * Copyright (C) 2009, Frederic Weisbecker */ /* @@ -35,334 +36,242 @@ #include #include -#include +#include + #include #ifdef CONFIG_X86 #include #endif -/* - * Spinlock that protects all (un)register operations over kernel/user-space - * breakpoint requests - */ -static DEFINE_SPINLOCK(hw_breakpoint_lock); - -/* Array of kernel-space breakpoint structures */ -struct hw_breakpoint *hbp_kernel[HBP_NUM]; - -/* - * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being - * modified but we need the older copy to handle any hbp exceptions. It will - * sync with hbp_kernel[] value after updation is done through IPIs. - */ -DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); - -/* - * Kernel breakpoints grow downwards, starting from HBP_NUM - * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for - * kernel-space request. We will initialise it here and not in an __init - * routine because load_debug_registers(), which uses this variable can be - * called very early during CPU initialisation. - */ -unsigned int hbp_kernel_pos = HBP_NUM; -/* - * An array containing refcount of threads using a given bkpt register - * Accesses are synchronised by acquiring hw_breakpoint_lock - */ -unsigned int hbp_user_refcount[HBP_NUM]; +static atomic_t bp_slot; -/* - * Load the debug registers during startup of a CPU. - */ -void load_debug_registers(void) +int reserve_bp_slot(struct perf_event *bp) { - unsigned long flags; - struct task_struct *tsk = current; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Prevent IPIs for new kernel breakpoint updates */ - local_irq_save(flags); - arch_update_kernel_hw_breakpoint(NULL); - local_irq_restore(flags); - - if (test_tsk_thread_flag(tsk, TIF_DEBUG)) - arch_install_thread_hw_breakpoint(tsk); - - spin_unlock_bh(&hw_breakpoint_lock); -} + if (atomic_inc_return(&bp_slot) == HBP_NUM) { + atomic_dec(&bp_slot); -/* - * Erase all the hardware breakpoint info associated with a thread. - * - * If tsk != current then tsk must not be usable (for example, a - * child being cleaned up from a failed fork). - */ -void flush_thread_hw_breakpoint(struct task_struct *tsk) -{ - int i; - struct thread_struct *thread = &(tsk->thread); - - spin_lock_bh(&hw_breakpoint_lock); - - /* The thread no longer has any breakpoints associated with it */ - clear_tsk_thread_flag(tsk, TIF_DEBUG); - for (i = 0; i < HBP_NUM; i++) { - if (thread->hbp[i]) { - hbp_user_refcount[i]--; - kfree(thread->hbp[i]); - thread->hbp[i] = NULL; - } + return -ENOSPC; } - arch_flush_thread_hw_breakpoint(tsk); - - /* Actually uninstall the breakpoints if necessary */ - if (tsk == current) - arch_uninstall_thread_hw_breakpoint(); - spin_unlock_bh(&hw_breakpoint_lock); + return 0; } -/* - * Copy the hardware breakpoint info from a thread to its cloned child. - */ -int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags) +void release_bp_slot(struct perf_event *bp) { - /* - * We will assume that breakpoint settings are not inherited - * and the child starts out with no debug registers set. - * But what about CLONE_PTRACE? - */ - clear_tsk_thread_flag(child, TIF_DEBUG); - - /* We will call flush routine since the debugregs are not inherited */ - arch_flush_thread_hw_breakpoint(child); - - return 0; + atomic_dec(&bp_slot); } -static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +int __register_perf_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int rc; + int ret; - /* Do not overcommit. Fail if kernel has used the hbp registers */ - if (pos >= hbp_kernel_pos) - return -ENOSPC; + ret = reserve_bp_slot(bp); + if (ret) + return ret; - rc = arch_validate_hwbkpt_settings(bp, tsk); - if (rc) - return rc; + if (!bp->attr.disabled) + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); - thread->hbp[pos] = bp; - hbp_user_refcount[pos]++; + return ret; +} - arch_update_user_hw_breakpoint(pos, tsk); - /* - * Does it need to be installed right now? - * Otherwise it will get installed the next time tsk runs - */ - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); +int register_perf_hw_breakpoint(struct perf_event *bp) +{ + bp->callback = perf_bp_event; - return rc; + return __register_perf_hw_breakpoint(bp); } /* - * Modify the address of a hbp register already in use by the task - * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() + * Register a breakpoint bound to a task and a given cpu. + * If cpu is -1, the breakpoint is active for the task in every cpu + * If the task is -1, the breakpoint is active for every tasks in the given + * cpu. */ -static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +static struct perf_event * +register_user_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + pid_t pid, + int cpu, + bool active) { - struct thread_struct *thread = &(tsk->thread); - - if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) - return -EINVAL; - - if (thread->hbp[pos] == NULL) - return -EINVAL; - - thread->hbp[pos] = bp; + struct perf_event_attr *attr; + struct perf_event *bp; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return ERR_PTR(-ENOMEM); + + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->bp_addr = addr; + attr->bp_len = len; + attr->bp_type = type; /* - * 'pos' must be that of a hbp register already used by 'tsk' - * Otherwise arch_modify_user_hw_breakpoint() will fail + * Such breakpoints are used by debuggers to trigger signals when + * we hit the excepted memory op. We can't miss such events, they + * must be pinned. */ - arch_update_user_hw_breakpoint(pos, tsk); + attr->pinned = 1; - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + if (!active) + attr->disabled = 1; - return 0; -} - -static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) -{ - hbp_user_refcount[pos]--; - tsk->thread.hbp[pos] = NULL; + bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); + kfree(attr); - arch_update_user_hw_breakpoint(pos, tsk); - - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + return bp; } /** * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * @active: should we activate it while registering it * */ -int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, rc = -ENOSPC; - - spin_lock_bh(&hw_breakpoint_lock); - - for (i = 0; i < hbp_kernel_pos; i++) { - if (!thread->hbp[i]) { - rc = __register_user_hw_breakpoint(i, tsk, bp); - break; - } - } - if (!rc) - set_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + tsk->pid, -1, active); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @bp: the breakpoint structure to modify + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to unregister - * + * @active: should we activate it while registering it */ -int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) +struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, ret = -ENOENT; + /* + * FIXME: do it without unregistering + * - We don't want to lose our slot + * - If the new bp is incorrect, don't lose the older one + */ + unregister_hw_breakpoint(bp); - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (bp == thread->hbp[i]) { - ret = __modify_user_hw_breakpoint(i, tsk, bp); - break; - } - } - spin_unlock_bh(&hw_breakpoint_lock); - return ret; + return register_user_hw_breakpoint(addr, len, type, triggered, + tsk, active); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); /** - * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs + * unregister_hw_breakpoint - unregister a user-space hardware breakpoint * @bp: the breakpoint structure to unregister - * */ -void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +void unregister_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, pos = -1, hbp_counter = 0; - - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (thread->hbp[i]) - hbp_counter++; - if (bp == thread->hbp[i]) - pos = i; - } - if (pos >= 0) { - __unregister_user_hw_breakpoint(pos, tsk); - hbp_counter--; - } - if (!hbp_counter) - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); + if (!bp) + return; + perf_event_release_kernel(bp); +} +EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); + +static struct perf_event * +register_kernel_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) +{ + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + -1, cpu, active); } -EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); /** - * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * register_wide_hw_breakpoint - register a wide breakpoint in the kernel + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint + * @active: should we activate it while registering it * + * @return a set of per_cpu pointers to perf events */ -int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { - int rc; + struct perf_event **cpu_events, **pevent, *bp; + long err; + int cpu; + + cpu_events = alloc_percpu(typeof(*cpu_events)); + if (!cpu_events) + return ERR_PTR(-ENOMEM); - rc = arch_validate_hwbkpt_settings(bp, NULL); - if (rc) - return rc; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + bp = register_kernel_hw_breakpoint_cpu(addr, len, type, + triggered, cpu, active); - spin_lock_bh(&hw_breakpoint_lock); + *pevent = bp; - rc = -ENOSPC; - /* Check if we are over-committing */ - if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { - hbp_kernel_pos--; - hbp_kernel[hbp_kernel_pos] = bp; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - rc = 0; + if (IS_ERR(bp) || !bp) { + err = PTR_ERR(bp); + goto fail; + } } - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return cpu_events; + +fail: + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + if (IS_ERR(*pevent) || !*pevent) + break; + unregister_hw_breakpoint(*pevent); + } + free_percpu(cpu_events); + /* return the error if any */ + return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); /** - * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space - * @bp: the breakpoint structure to unregister - * - * Uninstalls and unregisters @bp. + * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel + * @cpu_events: the per cpu set of events to unregister */ -void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) +void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { - int i, j; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Find the 'bp' in our list of breakpoints for kernel */ - for (i = hbp_kernel_pos; i < HBP_NUM; i++) - if (bp == hbp_kernel[i]) - break; + int cpu; + struct perf_event **pevent; - /* Check if we did not find a match for 'bp'. If so return early */ - if (i == HBP_NUM) { - spin_unlock_bh(&hw_breakpoint_lock); - return; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + unregister_hw_breakpoint(*pevent); } - - /* - * We'll shift the breakpoints one-level above to compact if - * unregistration creates a hole - */ - for (j = i; j > hbp_kernel_pos; j--) - hbp_kernel[j] = hbp_kernel[j-1]; - - hbp_kernel[hbp_kernel_pos] = NULL; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - hbp_kernel_pos++; - - spin_unlock_bh(&hw_breakpoint_lock); + free_percpu(cpu_events); } -EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); + static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, @@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void) { return register_die_notifier(&hw_breakpoint_exceptions_nb); } - core_initcall(init_hw_breakpoint); + + +struct pmu perf_ops_bp = { + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, + .unthrottle = hw_breakpoint_pmu_unthrottle +}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5087125e2a00..98dc56b2ebe4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_PROFILE */ +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + int err; + /* + * The breakpoint is already filled if we haven't created the counter + * through perf syscall + * FIXME: manage to get trigerred to NULL if it comes from syscalls + */ + if (!bp->callback) + err = register_perf_hw_breakpoint(bp); + else + err = __register_perf_hw_breakpoint(bp); + if (err) + return ERR_PTR(err); + + bp->destroy = bp_perf_event_destroy; + + return &perf_ops_bp; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ + /* TODO */ +} +#else +static void bp_perf_event_destroy(struct perf_event *event) +{ +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + return NULL; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ +} +#endif + atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) @@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr, pmu = tp_perf_event_init(event); break; + case PERF_TYPE_BREAKPOINT: + pmu = bp_perf_event_init(event); + break; + + default: break; } @@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) - return NULL ; + return NULL; event = perf_event_alloc(attr, cpu, ctx, NULL, NULL, callback, GFP_KERNEL); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 91c3d0e9a5a1..d72f06ff263f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -11,14 +11,11 @@ #include #include #include +#include #include #include -#ifdef CONFIG_KSYM_TRACER -#include -#endif - enum trace_type { __TRACE_FIRST_TYPE = 0, diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e19747d4f860..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry, F_STRUCT( __field( unsigned long, ip ) __field( unsigned char, type ) - __array( char , ksym_name, KSYM_NAME_LEN ) __array( char , cmd, TASK_COMM_LEN ) + __field( unsigned long, addr ) ), - F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", + F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", (void *)__entry->ip, (unsigned int)__entry->type, - __entry->ksym_name, __entry->cmd) + (void *)__entry->addr, __entry->cmd) ); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 6d5609c67378..fea83eeeef09 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -29,7 +29,11 @@ #include "trace_stat.h" #include "trace.h" -/* For now, let us restrict the no. of symbols traced simultaneously to number +#include +#include + +/* + * For now, let us restrict the no. of symbols traced simultaneously to number * of available hardware breakpoint registers. */ #define KSYM_TRACER_MAX HBP_NUM @@ -37,8 +41,10 @@ #define KSYM_TRACER_OP_LEN 3 /* rw- */ struct trace_ksym { - struct hw_breakpoint *ksym_hbp; + struct perf_event **ksym_hbp; unsigned long ksym_addr; + int type; + int len; #ifdef CONFIG_PROFILE_KSYM_TRACER unsigned long counter; #endif @@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) } #endif /* CONFIG_PROFILE_KSYM_TRACER */ -void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) +void ksym_hbp_handler(struct perf_event *hbp, void *data) { struct ring_buffer_event *event; struct ksym_trace_entry *entry; + struct pt_regs *regs = data; struct ring_buffer *buffer; int pc; @@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) entry = ring_buffer_event_data(event); entry->ip = instruction_pointer(regs); - entry->type = hbp->info.type; - strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); + entry->type = hw_breakpoint_type(hbp); + entry->addr = hw_breakpoint_addr(hbp); strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); #ifdef CONFIG_PROFILE_KSYM_TRACER - ksym_collect_stats(hbp->info.address); + ksym_collect_stats(hw_breakpoint_addr(hbp)); #endif /* CONFIG_PROFILE_KSYM_TRACER */ trace_buffer_unlock_commit(buffer, event, 0, pc); @@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str) int access = 0; if (str[0] == 'r') - access += 4; - else if (str[0] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_R; if (str[1] == 'w') - access += 2; - else if (str[1] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_W; - if (str[2] != '-') - return -EINVAL; + if (str[2] == 'x') + access |= HW_BREAKPOINT_X; switch (access) { - case 6: - access = HW_BREAKPOINT_RW; - break; - case 4: - access = -EINVAL; - break; - case 2: - access = HW_BREAKPOINT_WRITE; - break; + case HW_BREAKPOINT_W: + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + return access; + default: + return -EINVAL; } - - return access; } /* @@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (!entry) return -ENOMEM; - entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (!entry->ksym_hbp) - goto err; - - entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); - if (!entry->ksym_hbp->info.name) - goto err; - - entry->ksym_hbp->info.type = op; - entry->ksym_addr = entry->ksym_hbp->info.address = addr; -#ifdef CONFIG_X86 - entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; -#endif - entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; + entry->type = op; + entry->ksym_addr = addr; + entry->len = HW_BREAKPOINT_LEN_4; + + ret = -EAGAIN; + entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) { + entry->ksym_hbp = NULL; + ret = PTR_ERR(entry->ksym_hbp); + } - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret < 0) { + if (!entry->ksym_hbp) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); - ret = -EAGAIN; goto err; } + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); ksym_filter_entry_count++; + return 0; + err: - if (entry->ksym_hbp) - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); + return ret; } @@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); - if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) + ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); + if (entry->type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); - else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) + else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); WARN_ON_ONCE(!ret); } @@ -269,12 +263,10 @@ static void __ksym_trace_reset(void) mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); + unregister_wide_hw_breakpoint(entry->ksym_hbp); ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); } mutex_unlock(&ksym_tracer_mutex); @@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { if (entry->ksym_addr == ksym_addr) { /* Check for malformed request: (6) */ - if (entry->ksym_hbp->info.type != op) + if (entry->type != op) changed = 1; else goto out; @@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, } } if (changed) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); - entry->ksym_hbp->info.type = op; + unregister_wide_hw_breakpoint(entry->ksym_hbp); + entry->type = op; if (op > 0) { - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret == 0) + entry->ksym_hbp = + register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) + entry->ksym_hbp = NULL; + if (!entry->ksym_hbp) goto out; } ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); ret = 0; goto out; @@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, - entry->pid, iter->cpu, field->ksym_name); + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, + entry->pid, iter->cpu, (char *)field->addr); if (!ret) return TRACE_TYPE_PARTIAL_LINE; switch (field->type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: ret = trace_seq_printf(s, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: ret = trace_seq_printf(s, " RW "); break; default: @@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - if (entry->ksym_hbp) - access_type = entry->ksym_hbp->info.type; + access_type = entry->type; switch (access_type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: seq_puts(m, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: seq_puts(m, " RW "); break; default: diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 7179c12e4f0f..27c5072c2e6b 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ksym_selftest_dummy = 0; /* Register the read-write tracing request */ - ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, + HW_BREAKPOINT_R | HW_BREAKPOINT_W, (unsigned long)(&ksym_selftest_dummy)); if (ret < 0) { -- cgit v1.2.3 From eb647138acefc897c0eb6eddd5d3650966dfe627 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 8 Nov 2009 12:12:14 +0100 Subject: x86/PCI: Adjust GFP mask handling for coherent allocations Rather than forcing GFP flags and DMA mask to be inconsistent, GFP flags should be determined even for the fallback device through dma_alloc_coherent_mask()/dma_alloc_coherent_gfp_flags(). This restores 64-bit behavior as it was prior to commits 8965eb19386fdf5ccd0ef8b02593eb8560aa3416 and 4a367f3a9dbf2e7ffcee4702203479809236ee6e (not sure why there are two of them), where GFP_DMA was forced on for 32-bit, but not for 64-bit, with the slight adjustment that afaict even 32-bit doesn't need this without CONFIG_ISA. Signed-off-by: Jan Beulich Acked-by: Takashi Iwai LKML-Reference: <4AF18187020000780001D8AA@vpn.id2.novell.com> Signed-off-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/include/asm/dma-mapping.h | 10 +++++++--- arch/x86/kernel/pci-dma.c | 6 ++---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0ee770d23d0e..6a25d5d42836 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -14,6 +14,12 @@ #include #include +#ifdef CONFIG_ISA +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) +#else +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + extern dma_addr_t bad_dma_address; extern int iommu_merge; extern struct device x86_dma_fallback_dev; @@ -124,10 +130,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; - if (!dev) { + if (!dev) dev = &x86_dma_fallback_dev; - gfp |= GFP_DMA; - } if (!is_device_dma_capable(dev)) return NULL; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b2a71dca5642..a6e804d16c35 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -45,12 +45,10 @@ int iommu_pass_through __read_mostly; dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to older i386. */ +/* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", - .coherent_dma_mask = DMA_BIT_MASK(32), + .coherent_dma_mask = ISA_DMA_BIT_MASK, .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; EXPORT_SYMBOL(x86_dma_fallback_dev); -- cgit v1.2.3 From 46dc281b1bb02527195fe2ad50a3af6d7f7f7325 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 18:53:56 +0300 Subject: x86, apic: Use PAGE_SIZE instead of numbers The whole page is reserved for IO-APIC fixmap due to non-cacheable requirement. So lets note this explicitly instead of playing with numbers. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki LKML-Reference: <20091108155356.GB25940@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 31e9db3c12ad..9ee1c1628c17 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4111,7 +4111,7 @@ fake_ioapic_page: idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res->end = ioapic_phys + PAGE_SIZE-1; ioapic_res++; } } -- cgit v1.2.3 From 4343fe1024e09e17667f95620ed3e69a7a5f4389 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 18:54:31 +0300 Subject: x86, ioapic: Use snrpintf while set names for IO-APIC resourses We should be ready that one day MAX_IO_APICS may raise its number. To prevent memory overwrite we're to use safe snprintf while set IO-APIC resourse name. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20091108155431.GC25940@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ee1c1628c17..24d1458a1822 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4066,7 +4066,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) for (i = 0; i < nr_ioapics; i++) { res[i].name = mem; res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); + snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; } -- cgit v1.2.3 From f4a70c55376683213229af7266dc57ad81aee354 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 16:16:45 +0300 Subject: x86, apic: Get rid of apicid_to_cpu_present assign on 64-bit In fact it's never get used on x86-64 (for 64 bit platform we use differ technique to enumerate io-units). Reported-by: Stephen Rothwell Signed-off-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <20091108131645.GD5300@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_noop.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 9ab6ffb313ac..89629f622b60 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -162,7 +162,12 @@ struct apic apic_noop = { .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, + +#ifdef CONFIG_X86_32 .apicid_to_cpu_present = default_apicid_to_cpu_present, +#else + .apicid_to_cpu_present = NULL, +#endif .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, -- cgit v1.2.3 From fd650a6394b3242edf125ba9c4d500349a6d7178 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 9 Nov 2009 13:52:26 -0500 Subject: x86: Generate .byte code for some new instructions via gas macro It will take some time for binutils (gas) to support some newly added instructions, such as SSE4.1 instructions or the AES-NI instructions found in upcoming Intel CPU. To make the source code can be compiled by old binutils, .byte code is used instead of the assembly instruction. But the readability and flexibility of raw .byte code is not good. This patch solves the issue of raw .byte code via generating it via assembly instruction like gas macro. The syntax is as close as possible to real assembly instruction. Some helper macros such as MODRM is not a full feature implementation. It can be extended when necessary. Signed-off-by: Huang Ying Acked-by: H. Peter Anvin Signed-off-by: Herbert Xu --- arch/x86/include/asm/inst.h | 150 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 arch/x86/include/asm/inst.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h new file mode 100644 index 000000000000..14cf526091f9 --- /dev/null +++ b/arch/x86/include/asm/inst.h @@ -0,0 +1,150 @@ +/* + * Generate .byte code for some instructions not supported by old + * binutils. + */ +#ifndef X86_ASM_INST_H +#define X86_ASM_INST_H + +#ifdef __ASSEMBLY__ + + .macro XMM_NUM opd xmm + .ifc \xmm,%xmm0 + \opd = 0 + .endif + .ifc \xmm,%xmm1 + \opd = 1 + .endif + .ifc \xmm,%xmm2 + \opd = 2 + .endif + .ifc \xmm,%xmm3 + \opd = 3 + .endif + .ifc \xmm,%xmm4 + \opd = 4 + .endif + .ifc \xmm,%xmm5 + \opd = 5 + .endif + .ifc \xmm,%xmm6 + \opd = 6 + .endif + .ifc \xmm,%xmm7 + \opd = 7 + .endif + .ifc \xmm,%xmm8 + \opd = 8 + .endif + .ifc \xmm,%xmm9 + \opd = 9 + .endif + .ifc \xmm,%xmm10 + \opd = 10 + .endif + .ifc \xmm,%xmm11 + \opd = 11 + .endif + .ifc \xmm,%xmm12 + \opd = 12 + .endif + .ifc \xmm,%xmm13 + \opd = 13 + .endif + .ifc \xmm,%xmm14 + \opd = 14 + .endif + .ifc \xmm,%xmm15 + \opd = 15 + .endif + .endm + + .macro PFX_OPD_SIZE + .byte 0x66 + .endm + + .macro PFX_REX opd1 opd2 + .if (\opd1 | \opd2) & 8 + .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) + .endif + .endm + + .macro MODRM mod opd1 opd2 + .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) + .endm + + .macro PSHUFB_XMM xmm1 xmm2 + XMM_NUM pshufb_opd1 \xmm1 + XMM_NUM pshufb_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX pshufb_opd1 pshufb_opd2 + .byte 0x0f, 0x38, 0x00 + MODRM 0xc0 pshufb_opd1 pshufb_opd2 + .endm + + .macro PCLMULQDQ imm8 xmm1 xmm2 + XMM_NUM clmul_opd1 \xmm1 + XMM_NUM clmul_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX clmul_opd1 clmul_opd2 + .byte 0x0f, 0x3a, 0x44 + MODRM 0xc0 clmul_opd1 clmul_opd2 + .byte \imm8 + .endm + + .macro AESKEYGENASSIST rcon xmm1 xmm2 + XMM_NUM aeskeygen_opd1 \xmm1 + XMM_NUM aeskeygen_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aeskeygen_opd1 aeskeygen_opd2 + .byte 0x0f, 0x3a, 0xdf + MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2 + .byte \rcon + .endm + + .macro AESIMC xmm1 xmm2 + XMM_NUM aesimc_opd1 \xmm1 + XMM_NUM aesimc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesimc_opd1 aesimc_opd2 + .byte 0x0f, 0x38, 0xdb + MODRM 0xc0 aesimc_opd1 aesimc_opd2 + .endm + + .macro AESENC xmm1 xmm2 + XMM_NUM aesenc_opd1 \xmm1 + XMM_NUM aesenc_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenc_opd1 aesenc_opd2 + .byte 0x0f, 0x38, 0xdc + MODRM 0xc0 aesenc_opd1 aesenc_opd2 + .endm + + .macro AESENCLAST xmm1 xmm2 + XMM_NUM aesenclast_opd1 \xmm1 + XMM_NUM aesenclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesenclast_opd1 aesenclast_opd2 + .byte 0x0f, 0x38, 0xdd + MODRM 0xc0 aesenclast_opd1 aesenclast_opd2 + .endm + + .macro AESDEC xmm1 xmm2 + XMM_NUM aesdec_opd1 \xmm1 + XMM_NUM aesdec_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdec_opd1 aesdec_opd2 + .byte 0x0f, 0x38, 0xde + MODRM 0xc0 aesdec_opd1 aesdec_opd2 + .endm + + .macro AESDECLAST xmm1 xmm2 + XMM_NUM aesdeclast_opd1 \xmm1 + XMM_NUM aesdeclast_opd2 \xmm2 + PFX_OPD_SIZE + PFX_REX aesdeclast_opd1 aesdeclast_opd2 + .byte 0x0f, 0x38, 0xdf + MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 + .endm +#endif + +#endif -- cgit v1.2.3 From 506f90eeae682dc96c11c7aefac0262b3a560b49 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 29 Oct 2009 14:45:52 +0100 Subject: x86, amd-ucode: Check UCODE_MAGIC before loading the container file Signed-off-by: Borislav Petkov Signed-off-by: Andreas Herrmann LKML-Reference: <20091029134552.GC30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 366baa179913..f4c538b681ca 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -317,6 +317,12 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; } + if (*(u32 *)firmware->data != UCODE_MAGIC) { + printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", + *(u32 *)firmware->data); + return UCODE_ERROR; + } + ret = generic_load_microcode(cpu, firmware->data, firmware->size); release_firmware(firmware); -- cgit v1.2.3 From 6e18da75c28b592594fd632cf3e6eb09d3d078de Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 29 Oct 2009 14:47:42 +0100 Subject: x86, amd-ucode: Remove needless log messages Signed-off-by: Andreas Herrmann Cc: Borislav Petkov LKML-Reference: <20091029134742.GD30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index f4c538b681ca..c043534fd986 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -109,12 +109,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) return 0; } - if (mc_header->processor_rev_id != equiv_cpu_id) { - printk(KERN_ERR "microcode: CPU%d: patch mismatch " - "(processor_rev_id: %x, equiv_cpu_id: %x)\n", - cpu, mc_header->processor_rev_id, equiv_cpu_id); + if (mc_header->processor_rev_id != equiv_cpu_id) return 0; - } /* ucode might be chipset specific -- currently we don't support this */ if (mc_header->nb_dev_id || mc_header->sb_dev_id) { @@ -185,9 +181,6 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); - printk(KERN_DEBUG "microcode: size %u, total_size %u\n", - size, total_size); - if (total_size > size || total_size > UCODE_MAX_SIZE) { printk(KERN_ERR "microcode: error: size mismatch\n"); return NULL; -- cgit v1.2.3 From 7abc07531383ac7f727cc9d44e1360a829f2082e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 10 Nov 2009 01:06:59 +0300 Subject: x86: apic: Do not use stacked physid_mask_t We should not use physid_mask_t as a stack based variable in apic code. This type depends on MAX_APICS parameter which may be huge enough. Especially it became a problem with apic NOOP driver which is portable between 32 bit and 64 bit environment (where we have really huge MAX_APICS). So apic driver should operate with pointers and a caller in turn should aware of allocation physid_mask_t variable. As a side (but positive) effect -- we may use already implemented physid_set_mask_of_physid function eliminating default_apicid_to_cpu_present completely. Note that physids_coerce and physids_promote turned into static inline from macro (since macro hides the fact that parameter is being interpreted as unsigned long, make it explicit). Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki Cc: Stephen Rothwell LKML-Reference: <20091109220659.GA5568@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 19 +++++++------------ arch/x86/include/asm/mpspec.h | 16 +++++++++------- arch/x86/kernel/apic/apic_noop.c | 18 ++++-------------- arch/x86/kernel/apic/bigsmp_32.c | 13 ++++--------- arch/x86/kernel/apic/es7000_32.c | 16 ++++++---------- arch/x86/kernel/apic/io_apic.c | 14 +++++++------- arch/x86/kernel/apic/numaq_32.c | 13 ++++++------- arch/x86/kernel/apic/probe_32.c | 2 +- arch/x86/kernel/apic/summit_32.c | 10 +++++----- arch/x86/kernel/visws_quirks.c | 2 +- 10 files changed, 50 insertions(+), 73 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 08a5f420e07b..b4ac2cdcb64f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,20 +297,20 @@ struct apic { int disable_esr; int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); @@ -534,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long default_check_apicid_present(int bit) @@ -544,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { - return phys_map; + *retmap = *phys_map; } /* Mapping from cpu number to logical apicid */ @@ -585,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c94500c0bb..61d90b1331c3 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t; #define physids_shift_left(d, s, n) \ bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) +static inline unsigned long physids_coerce(physid_mask_t *map) +{ + return map->mask[0]; +} -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) +static inline void physids_promote(unsigned long physids, physid_mask_t *map) +{ + physids_clear(*map); + map->mask[0] = physids; +} /* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 89629f622b60..d9acc3bee0f4 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void) return 0; } -static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - static int noop_cpu_to_logical_apicid(int cpu) { return 0; @@ -100,9 +95,9 @@ static const struct cpumask *noop_target_cpus(void) return cpumask_of(0); } -static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static unsigned long noop_check_apicid_present(int bit) @@ -155,19 +150,14 @@ struct apic apic_noop = { .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, - .ioapic_phys_id_map = noop_ioapic_phys_id_map, + .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = NULL, .multi_timer_check = NULL, .apicid_to_node = noop_apicid_to_node, .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - -#ifdef CONFIG_X86_32 - .apicid_to_cpu_present = default_apicid_to_cpu_present, -#else - .apicid_to_cpu_present = NULL, -#endif + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b2..38dcecfa5818 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) #endif } -static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { @@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); + physids_promote(0xFFL, retmap); } static int bigsmp_check_phys_apicid_present(int phys_apicid) @@ -230,7 +225,7 @@ struct apic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b49..e85f8fb7f8e7 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void) return cpumask_of(smp_processor_id()); } -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } + static unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); @@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) static int cpu_id; -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) { - physid_mask_t mask; - - mask = physid_mask_of_physid(cpu_id); + physid_set_mask_of_physid(cpu_id, retmap); ++cpu_id; - - return mask; } /* Mapping from cpu number to logical apicid */ @@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) #endif } -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); + physids_promote(0xFFL, retmap); } static int es7000_check_phys_apicid_present(int cpu_physical_apicid) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 24d1458a1822..20ea8392bc57 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2031,7 +2031,7 @@ void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -2058,7 +2058,7 @@ void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(&phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -2073,7 +2073,7 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -3904,7 +3904,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -3920,10 +3920,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(&apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(&apic_id_map, i)) break; } @@ -3936,7 +3936,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apic->apicid_to_cpu_present(apic_id); + apic->apicid_to_cpu_present(apic_id, &tmp); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b8505..07cdbdcd7a92 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void) return cpu_all_mask; } -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long numaq_check_apicid_present(int bit) @@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); + return physids_promote(0xFUL, retmap); } static inline int numaq_cpu_to_logical_apicid(int cpu) @@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); - return physid_mask_of_physid(cpu + 4*node); + physid_set_mask_of_physid(cpu + 4*node, retmap); } /* Where the IO area was mapped on multiquad, always 0 otherwise */ diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947d..1a6559f6768c 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -108,7 +108,7 @@ struct apic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0be..9b419263d90d 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) return cpumask_of(0); } -static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); + physids_promote(0x0FL, retmap); } -static physid_mask_t summit_apicid_to_cpu_present(int apicid) +static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) { - return physid_mask_of_physid(0); + physid_set_mask_of_physid(0, retmap); } static int summit_check_phys_apicid_present(int physical_apicid) diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b17..cff70c86e18e 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apic->apicid_to_cpu_present(m->apicid); + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version -- cgit v1.2.3 From a2202aa29289db64ca7988b12343158b67b27f10 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 10 Nov 2009 09:38:24 +0800 Subject: x86: Under BIOS control, restore AP's APIC_LVTTHMR to the BSP value On platforms where the BIOS handles the thermal monitor interrupt, APIC_LVTTHMR on each logical CPU is programmed to generate a SMI and OS must not touch it. Unfortunately AP bringup sequence using INIT-SIPI-SIPI clears all the LVT entries except the mask bit. Essentially this results in all LVT entries including the thermal monitoring interrupt set to masked (clearing the bios programmed value for APIC_LVTTHMR). And this leads to kernel take over the thermal monitoring interrupt on AP's but not on BSP (leaving the bios programmed value only on BSP). As a result of this, we have seen system hangs when the thermal monitoring interrupt is generated. Fix this by reading the initial value of thermal LVT entry on BSP and if bios has taken over the control, then program the same value on all AP's and leave the thermal monitoring interrupt control on all the logical cpu's to the bios. Signed-off-by: Yong Wang Reviewed-by: Suresh Siddha Cc: Borislav Petkov Cc: Arjan van de Ven LKML-Reference: <20091110013824.GA24940@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar Cc: stable@kernel.org --- arch/x86/include/asm/mce.h | 9 +++++++++ arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/cpu/mcheck/mce.c | 5 +++-- arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 ++++++++++++++++++++++++++++- arch/x86/kernel/setup.c | 3 +++ 5 files changed, 43 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 161485da6838..858baa061cfc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -120,8 +120,10 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE +int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); #else +static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif @@ -215,5 +217,12 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); + +#ifdef CONFIG_X86_THERMAL_VECTOR +extern void mcheck_intel_therm_init(void); +#else +static inline void mcheck_intel_therm_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4df69a38be57..9053be5d95cd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); -#endif select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 80801705edd7..0d4102031a4c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1655,13 +1655,14 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); -static int __init mcheck_init(void) +int __init mcheck_init(void) { atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + mcheck_intel_therm_init(); + return 0; } -early_initcall(mcheck_init); /* * Sysfs support diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba75330..7f3cf36ed124 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); +static u32 lvtthmr_init __read_mostly; + #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) @@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +void mcheck_intel_therm_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && + cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); + + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. + * Always restore the value that BIOS has programmed on AP based on + * BSP's info we saved since BIOS is always setting the same value + * for all threads/cores + */ + apic_write(APIC_LVTTHMR, lvtthmr_init); + + h = lvtthmr_init; + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..179c1f2aa457 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -109,6 +109,7 @@ #ifdef CONFIG_X86_64 #include #endif +#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -1024,6 +1025,8 @@ void __init setup_arch(char **cmdline_p) #endif #endif x86_init.oem.banner(); + + mcheck_init(); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 41855b77547fa18d90ed6a5d322983d3fdab1959 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 9 Nov 2009 17:58:50 -0800 Subject: x86: GART: pci-gart_64.c: Use correct length in strncmp Signed-off-by: Joe Perches Cc: # .3x.x LKML-Reference: <1257818330.12852.72.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a9bcdf7c8801..eb46ab3f52b2 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -858,7 +858,7 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) + if (!strncmp(p, "fullflush", 9)) iommu_fullflush = 1; if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; -- cgit v1.2.3 From 83ea05ea69290b2e30da795527dbe304db1e2331 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Tue, 10 Nov 2009 17:23:07 +0800 Subject: x86: pat: Clean up req_type special case for reserve_memtype() Commit: b6ff32d: x86, PAT: Consolidate code in pat_x_mtrr_type() and reserve_memtype() consolidated code in pat_x_mtrr_type() and reserve_memtype(), which removed the special case (req_type is -1) for the PAT-enabled part. We should also change comments and the PAT-disabled part. Signed-off-by: Xiaotian Feng Cc: Suresh Siddha Cc: Venkatesh Pallipadi LKML-Reference: <1257844987-7906-1-git-send-email-dfeng@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0ec2bcf..81fb75344cd3 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -355,9 +355,6 @@ static int free_ram_pages_type(u64 start, u64 end) * - _PAGE_CACHE_UC_MINUS * - _PAGE_CACHE_UC * - * req_type will have a special case value '-1', when requester want to inherit - * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. - * * If new_type is NULL, function will return an error if it cannot reserve the * region with req_type. If new_type is non-NULL, function will return * available type in new_type in case of no error. In case of any error @@ -377,9 +374,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (!pat_enabled) { /* This is identical to page table setting without PAT */ if (new_type) { - if (req_type == -1) - *new_type = _PAGE_CACHE_WB; - else if (req_type == _PAGE_CACHE_WC) + if (req_type == _PAGE_CACHE_WC) *new_type = _PAGE_CACHE_UC_MINUS; else *new_type = req_type & _PAGE_CACHE_MASK; -- cgit v1.2.3 From 2fb8f4e6a83dcaec15c1dd0ee8a6f618e7ece7f0 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Tue, 10 Nov 2009 17:23:25 +0800 Subject: x86: pat: Remove ioremap_default() Commit: b6ff32d: x86, PAT: Consolidate code in pat_x_mtrr_type() and reserve_memtype() consolidated reserve_memtype() and pat_x_mtrr_type, this made ioremap_default() same as ioremap_cache(). Remove the redundant function and change the only caller to use ioremap_cache. Signed-off-by: Xiaotian Feng Cc: Suresh Siddha Cc: Venkatesh Pallipadi LKML-Reference: <1257845005-7938-1-git-send-email-dfeng@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 334e63ca7b2b..3af10dee0147 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -283,30 +283,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_cache); -static void __iomem *ioremap_default(resource_size_t phys_addr, - unsigned long size) -{ - unsigned long flags; - void __iomem *ret; - int err; - - /* - * - WB for WB-able memory and no other conflicting mappings - * - UC_MINUS for non-WB-able memory with no other conflicting mappings - * - Inherit from confliting mappings otherwise - */ - err = reserve_memtype(phys_addr, phys_addr + size, - _PAGE_CACHE_WB, &flags); - if (err < 0) - return NULL; - - ret = __ioremap_caller(phys_addr, size, flags, - __builtin_return_address(0)); - - free_memtype(phys_addr, phys_addr + size); - return ret; -} - void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val) { @@ -382,7 +358,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void __force *)ioremap_default(start, PAGE_SIZE); + addr = (void __force *)ioremap_cache(start, PAGE_SIZE); if (addr) addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); -- cgit v1.2.3 From 9f6b3c2c30cfbb1166ce7e74a8f9fd93ae19d2de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 9 Nov 2009 21:03:43 +0100 Subject: hw-breakpoints: Fix broken a.out format dump Fix the broken a.out format dump. For now we only dump the ptrace breakpoints. TODO: Dump every perf breakpoints for the current thread, not only ptrace based ones. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: "K. Prasad" --- arch/x86/include/asm/a.out-core.h | 10 ++-------- arch/x86/include/asm/debugreg.h | 2 ++ arch/x86/kernel/hw_breakpoint.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index fc4685dd6e4d..7a15588e45d4 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,7 @@ #include #include +#include /* * fill in the user structure for an a.out core dump @@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg[0]; - dump->u_debugreg[1] = current->thread.debugreg[1]; - dump->u_debugreg[2] = current->thread.debugreg[2]; - dump->u_debugreg[3] = current->thread.debugreg[3]; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + aout_dump_debugregs(dump); if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 9a3333c91f9a..f1b673f08239 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -89,6 +89,8 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +extern void aout_dump_debugregs(struct user *dump); + #ifdef CONFIG_KVM extern void hw_breakpoint_restore(void); #endif diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index e622620790bd..57dcee5fa958 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -375,6 +375,41 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, return 0; } +/* + * Dump the debug register contents to the user. + * We can't dump our per cpu values because it + * may contain cpu wide breakpoint, something that + * doesn't belong to the current task. + * + * TODO: include non-ptrace user breakpoints (perf) + */ +void aout_dump_debugregs(struct user *dump) +{ + int i; + int dr7 = 0; + struct perf_event *bp; + struct arch_hw_breakpoint *info; + struct thread_struct *thread = ¤t->thread; + + for (i = 0; i < HBP_NUM; i++) { + bp = thread->ptrace_bps[i]; + + if (bp && !bp->attr.disabled) { + dump->u_debugreg[i] = bp->attr.bp_addr; + info = counter_arch_bp(bp); + dr7 |= encode_dr7(i, info->len, info->type); + } else { + dump->u_debugreg[i] = 0; + } + } + + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + + dump->u_debugreg[7] = dr7; +} + /* * Release the user breakpoints used by ptrace */ -- cgit v1.2.3 From 59d8eb53ea9947db7cad8ebc31b0fb54f23a9851 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Nov 2009 11:03:12 +0100 Subject: hw-breakpoints: Wrap in the KVM breakpoint active state check Wrap in the cpu dr7 check that tells if we have active breakpoints that need to be restored in the cpu. This wrapper makes the check more self-explainable and also reusable for any further other uses. Reported-by: Jan Kiszka Signed-off-by: Frederic Weisbecker Cc: Avi Kivity Cc: "K. Prasad" --- arch/x86/include/asm/debugreg.h | 5 +++++ arch/x86/kvm/x86.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index f1b673f08239..0f6e92af4227 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -89,6 +89,11 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +static inline int hw_breakpoint_active(void) +{ + return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; +} + extern void aout_dump_debugregs(struct user *dump); #ifdef CONFIG_KVM diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22dee7aa7813..3817220cc86b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3651,7 +3651,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * care about the messed up debug address registers. But if * we have some of them active, restore the old state. */ - if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + if (hw_breakpoint_active()) hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); -- cgit v1.2.3 From d1c84f79a6ba992dc01e312c44a21496303874d6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 10 Nov 2009 12:07:23 +0100 Subject: x86: ucode-amd: Load ucode-patches once and not separately of each CPU This also implies that corresponding log messages, e.g. platform microcode: firmware: requesting amd-ucode/microcode_amd.bin show up only once on module load and not when ucode is updated for each CPU. Signed-off-by: Andreas Herrmann Cc: dimm LKML-Reference: <20091110110723.GH30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/microcode.h | 2 ++ arch/x86/kernel/microcode_amd.c | 24 +++++++++++++++++------- arch/x86/kernel/microcode_core.c | 6 ++++++ 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b501e22a..c24ca9a56458 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -12,6 +12,8 @@ struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; struct microcode_ops { + void (*init)(struct device *device); + void (*fini)(void); enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index c043534fd986..75538f647193 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -33,6 +33,8 @@ MODULE_LICENSE("GPL v2"); #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 +const struct firmware *firmware; + struct equiv_cpu_entry { u32 installed_cpu; u32 fixed_errata_mask; @@ -301,14 +303,10 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) static enum ucode_state request_microcode_fw(int cpu, struct device *device) { - const char *fw_name = "amd-ucode/microcode_amd.bin"; - const struct firmware *firmware; enum ucode_state ret; - if (request_firmware(&firmware, fw_name, device)) { - printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); + if (firmware == NULL) return UCODE_NFOUND; - } if (*(u32 *)firmware->data != UCODE_MAGIC) { printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", @@ -318,8 +316,6 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) ret = generic_load_microcode(cpu, firmware->data, firmware->size); - release_firmware(firmware); - return ret; } @@ -339,7 +335,21 @@ static void microcode_fini_cpu_amd(int cpu) uci->mc = NULL; } +void init_microcode_amd(struct device *device) +{ + const char *fw_name = "amd-ucode/microcode_amd.bin"; + if (request_firmware(&firmware, fw_name, device)) + printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); +} + +void fini_microcode_amd(void) +{ + release_firmware(firmware); +} + static struct microcode_ops microcode_amd_ops = { + .init = init_microcode_amd, + .fini = fini_microcode_amd, .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf8..d2a816021d9f 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -520,6 +520,9 @@ static int __init microcode_init(void) return PTR_ERR(microcode_pdev); } + if (microcode_ops->init) + microcode_ops->init(µcode_pdev->dev); + get_online_cpus(); mutex_lock(µcode_mutex); @@ -563,6 +566,9 @@ static void __exit microcode_exit(void) platform_device_unregister(microcode_pdev); + if (microcode_ops->fini) + microcode_ops->fini(); + microcode_ops = NULL; pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -- cgit v1.2.3 From 14c569425a0ae12cbeed72fdb8ebe78c48455dfd Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 10 Nov 2009 12:08:25 +0100 Subject: x86: ucode-amd: Don't warn when no ucode is available for a CPU revision There is no point in warning when there is no ucode available for a specific CPU revision. Currently the container-file, which provides the AMD ucode patches for OS load, contains only a few ucode patches. It's already clearly indicated by the printed patch_level whenever new ucode was available and an update happened. So the warning message is of no help but rather annoying on systems with many CPUs. Signed-off-by: Andreas Herrmann Cc: dimm LKML-Reference: <20091110110825.GI30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 75538f647193..9f13324054ce 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -105,11 +105,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) i++; } - if (!equiv_cpu_id) { - printk(KERN_WARNING "microcode: CPU%d: cpu revision " - "not listed in equivalent cpu table\n", cpu); + if (!equiv_cpu_id) return 0; - } if (mc_header->processor_rev_id != equiv_cpu_id) return 0; -- cgit v1.2.3 From 1a74357066369be91e6f4f431621a00b052df964 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 10 Nov 2009 12:09:20 +0100 Subject: x86: ucode-amd: Convert printk(KERN_*...) to pr_*(...) Signed-off-by: Andreas Herrmann Cc: dimm LKML-Reference: <20091110110920.GJ30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 9f13324054ce..26e33bd8485b 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -78,12 +78,12 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " - "supported\n", cpu, c->x86); + pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " + "supported\n", cpu, c->x86); return -1; } rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); - printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); + pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; } @@ -113,7 +113,7 @@ static int get_matching_microcode(int cpu, void *mc, int rev) /* ucode might be chipset specific -- currently we don't support this */ if (mc_header->nb_dev_id || mc_header->sb_dev_id) { - printk(KERN_ERR "microcode: CPU%d: loading of chipset " + pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " "specific code not yet supported\n", cpu); return 0; } @@ -143,14 +143,12 @@ static int apply_microcode_amd(int cpu) /* check current patch id and patch's id for match */ if (rev != mc_amd->hdr.patch_id) { - printk(KERN_ERR "microcode: CPU%d: update failed " + pr_err("microcode: CPU%d: update failed " "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); return -1; } - printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", - cpu, rev); - + pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); uci->cpu_sig.rev = rev; return 0; @@ -173,7 +171,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; if (section_hdr[0] != UCODE_UCODE_TYPE) { - printk(KERN_ERR "microcode: error: invalid type field in " + pr_err("microcode: error: invalid type field in " "container file section header\n"); return NULL; } @@ -181,7 +179,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); if (total_size > size || total_size > UCODE_MAX_SIZE) { - printk(KERN_ERR "microcode: error: size mismatch\n"); + pr_err("microcode: error: size mismatch\n"); return NULL; } @@ -210,15 +208,14 @@ static int install_equiv_cpu_table(const u8 *buf) size = buf_pos[2]; if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - printk(KERN_ERR "microcode: error: invalid type field in " + pr_err("microcode: error: invalid type field in " "container file section header\n"); return 0; } equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); if (!equiv_cpu_table) { - printk(KERN_ERR "microcode: failed to allocate " - "equivalent CPU table\n"); + pr_err("microcode: failed to allocate equivalent CPU table\n"); return 0; } @@ -251,8 +248,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) offset = install_equiv_cpu_table(ucode_ptr); if (!offset) { - printk(KERN_ERR "microcode: failed to create " - "equivalent cpu table\n"); + pr_err("microcode: failed to create equivalent cpu table\n"); return UCODE_ERROR; } @@ -306,7 +302,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; if (*(u32 *)firmware->data != UCODE_MAGIC) { - printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", + pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", *(u32 *)firmware->data); return UCODE_ERROR; } @@ -319,8 +315,8 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { - printk(KERN_INFO "microcode: AMD microcode update via " - "/dev/cpu/microcode not supported\n"); + pr_info("microcode: AMD microcode update via " + "/dev/cpu/microcode not supported\n"); return UCODE_ERROR; } @@ -336,7 +332,7 @@ void init_microcode_amd(struct device *device) { const char *fw_name = "amd-ucode/microcode_amd.bin"; if (request_firmware(&firmware, fw_name, device)) - printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); + pr_err("microcode: failed to load file %s\n", fw_name); } void fini_microcode_amd(void) -- cgit v1.2.3 From d07c1be0693e0902d743160b8b638585b808f8ac Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:12 +0900 Subject: x86: Add iommu_init to x86_init_ops We call the detections functions of all the IOMMUs then all their initialization functions. The latter is pointless since we don't detect multiple different IOMMUs. What we need to do is calling the initialization function of the detected IOMMU. This adds iommu_init hook to x86_init_ops so if an IOMMU detection function can set its initialization function to the hook. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 9 +++++++++ arch/x86/kernel/pci-dma.c | 2 ++ arch/x86/kernel/x86_init.c | 5 +++++ 3 files changed, 16 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 66008ed80b7a..d8e71459f025 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -90,6 +90,14 @@ struct x86_init_timers { void (*timer_init)(void); }; +/** + * struct x86_init_iommu - platform specific iommu setup + * @iommu_init: platform specific iommu setup + */ +struct x86_init_iommu { + int (*iommu_init)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -101,6 +109,7 @@ struct x86_init_ops { struct x86_init_oem oem; struct x86_init_paging paging; struct x86_init_timers timers; + struct x86_init_iommu iommu; }; /** diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 839d49a669bc..a13478da533c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -292,6 +292,8 @@ static int __init pci_iommu_init(void) dma_debug_add_bus(&pci_bus_type); #endif + x86_init.iommu.iommu_init(); + calgary_iommu_init(); intel_iommu_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index bc9b230ef402..c46984d122dc 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -19,6 +19,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } +int __init iommu_init_noop(void) { return 0; } /* * The platform setup functions are preset with the default functions @@ -63,6 +64,10 @@ struct x86_init_ops x86_init __initdata = { .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, }, + + .iommu = { + .iommu_init = iommu_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { -- cgit v1.2.3 From d7b9f7be216b04ff9d108f856bc03d96e7b3439c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:13 +0900 Subject: x86: Calgary: Convert detect_calgary() to use iommu_init hook This changes detect_calgary() to set init_calgary() to iommu_init hook if detect_calgary() finds the Calgary IOMMU. We can kill the code to check if we found the IOMMU in init_calgary() since detect_calgary() sets init_calgary() only when it found the IOMMU. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com LKML-Reference: <1257849980-22640-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calgary.h | 2 -- arch/x86/kernel/pci-calgary_64.c | 11 +++++------ arch/x86/kernel/pci-dma.c | 2 -- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb62aa7..0918654305af 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,10 +62,8 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern int calgary_iommu_init(void); extern void detect_calgary(void); #else -static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3bec47a8..47bd419ea4d2 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -46,6 +46,7 @@ #include #include #include +#include #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; @@ -1344,6 +1345,8 @@ static void __init get_tce_space_from_tar(void) return; } +int __init calgary_iommu_init(void); + void __init detect_calgary(void) { int bus; @@ -1445,6 +1448,8 @@ void __init detect_calgary(void) /* swiotlb for devices that aren't behind the Calgary. */ if (max_pfn > MAX_DMA32_PFN) swiotlb = 1; + + x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1461,12 +1466,6 @@ int __init calgary_iommu_init(void) { int ret; - if (no_iommu || (swiotlb && !calgary_detected)) - return -ENODEV; - - if (!calgary_detected) - return -ENODEV; - /* ok, we're trying to use Calgary - let's roll */ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a13478da533c..0224da88256a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -294,8 +294,6 @@ static int __init pci_iommu_init(void) x86_init.iommu.iommu_init(); - calgary_iommu_init(); - intel_iommu_init(); amd_iommu_init(); -- cgit v1.2.3 From de957628ce7c84764ff41331111036b3ae5bad0f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:14 +0900 Subject: x86: GART: Convert gart_iommu_hole_init() to use iommu_init hook This changes gart_iommu_hole_init() to set gart_iommu_init() to iommu_init hook if gart_iommu_hole_init() finds the GART IOMMU. We can kill the code to check if we found the IOMMU in gart_iommu_init() since gart_iommu_hole_init() sets gart_iommu_init() only when it found the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/gart.h | 5 +---- arch/x86/kernel/aperture_64.c | 2 ++ arch/x86/kernel/pci-dma.c | 2 -- arch/x86/kernel/pci-gart_64.c | 15 +++++---------- 4 files changed, 8 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4fdd5b3f87b1..4ac5b0f33fc1 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -35,7 +35,7 @@ extern int gart_iommu_aperture_allowed; extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); -extern void gart_iommu_init(void); +extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -47,9 +47,6 @@ extern void gart_iommu_hole_init(void); static inline void early_gart_iommu_check(void) { } -static inline void gart_iommu_init(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d8ffe0..03933cf0b63c 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -28,6 +28,7 @@ #include #include #include +#include int gart_iommu_aperture; int gart_iommu_aperture_disabled __initdata; @@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void) iommu_detected = 1; gart_iommu_aperture = 1; + x86_init.iommu.iommu_init = gart_iommu_init; aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0224da88256a..ecde8543537f 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -298,8 +298,6 @@ static int __init pci_iommu_init(void) amd_iommu_init(); - gart_iommu_init(); - no_iommu_init(); return 0; } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index eb46ab3f52b2..0410bd30060d 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -709,7 +709,7 @@ static void gart_iommu_shutdown(void) } } -void __init gart_iommu_init(void) +int __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; @@ -719,7 +719,7 @@ void __init gart_iommu_init(void) long i; if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) - return; + return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; @@ -731,13 +731,6 @@ void __init gart_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) - return; - - /* Did we detect a different HW IOMMU? */ - if (iommu_detected && !gart_iommu_aperture) - return; - if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || @@ -747,7 +740,7 @@ void __init gart_iommu_init(void) "but GART IOMMU not available.\n"); printk(KERN_WARNING "falling back to iommu=soft.\n"); } - return; + return 0; } /* need to map that range */ @@ -840,6 +833,8 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; x86_platform.iommu_shutdown = gart_iommu_shutdown; + + return 0; } void __init gart_parse_options(char *p) -- cgit v1.2.3 From ea1b0d3945c7374849235b6ecaea1191ee1d9d50 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:15 +0900 Subject: x86: amd_iommu: Convert amd_iommu_detect() to use iommu_init hook This changes amd_iommu_detect() to set amd_iommu_init to iommu_init hook if amd_iommu_detect() finds the AMD IOMMU. We can kill the code to check if we found the IOMMU in amd_iommu_init() since amd_iommu_detect() sets amd_iommu_init() only when it found the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-5-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/amd_iommu.h | 2 -- arch/x86/kernel/amd_iommu_init.c | 17 +++-------------- arch/x86/kernel/pci-dma.c | 2 -- 3 files changed, 3 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 3604669f7b15..b8ef2ee93643 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -23,7 +23,6 @@ #include #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern void amd_iommu_detect(void); @@ -32,7 +31,6 @@ extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_apply_erratum_63(u16 devid); #else -static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } #endif diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6acd43e9afd7..c41aabddaa2a 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * definitions for the ACPI scanning code @@ -1176,19 +1177,10 @@ static struct sys_device device_amd_iommu = { * functions. Finally it prints some information about AMD IOMMUs and * the driver state and enables the hardware. */ -int __init amd_iommu_init(void) +static int __init amd_iommu_init(void) { int i, ret = 0; - - if (no_iommu) { - printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); - return 0; - } - - if (!amd_iommu_detected) - return -ENODEV; - /* * First parse ACPI tables to find the largest Bus/Dev/Func * we need to handle. Upon this information the shared data @@ -1344,10 +1336,7 @@ void __init amd_iommu_detect(void) if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; amd_iommu_detected = 1; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif + x86_init.iommu.iommu_init = amd_iommu_init; } } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index ecde8543537f..5ca44a9301a0 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -296,8 +296,6 @@ static int __init pci_iommu_init(void) intel_iommu_init(); - amd_iommu_init(); - no_iommu_init(); return 0; } -- cgit v1.2.3 From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:16 +0900 Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook This changes detect_intel_iommu() to set intel_iommu_init() to iommu_init hook if detect_intel_iommu() finds the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_DMAR case ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 -- drivers/pci/dmar.c | 4 ++++ include/linux/dmar.h | 15 ++++----------- 3 files changed, 8 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 5ca44a9301a0..bed05e2e5890 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -294,8 +294,6 @@ static int __init pci_iommu_init(void) x86_init.iommu.iommu_init(); - intel_iommu_init(); - no_iommu_init(); return 0; } diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 22b02c6df854..bce9cd7c755a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -616,6 +616,10 @@ void __init detect_intel_iommu(void) if (ret && !no_iommu && !iommu_detected && !swiotlb && !dmar_disabled) iommu_detected = 1; +#endif +#ifdef CONFIG_X86 + if (ret) + x86_init.iommu.iommu_init = intel_iommu_init; #endif } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162c256a..5de4c9e5856d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -208,16 +208,9 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; -/* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -#else -static inline int intel_iommu_init(void) -{ -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else - return -ENODEV; -#endif -} -#endif /* !CONFIG_DMAR */ +#else /* !CONFIG_DMAR: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +#endif /* CONFIG_DMAR */ + #endif /* __DMAR_H__ */ -- cgit v1.2.3 From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:19 +0900 Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info() This enables us to avoid printing swiotlb memory info when we initialize swiotlb. After swiotlb initialization, we could find that we don't need swiotlb. This patch removes the code to print swiotlb memory info in swiotlb_init() and exports the function to do that. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: tony.luck@intel.com Cc: benh@kernel.crashing.org LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: merge up conflict ] Signed-off-by: Ingo Molnar --- arch/ia64/kernel/pci-swiotlb.c | 4 ++-- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 3 +-- include/linux/swiotlb.h | 4 ++-- lib/swiotlb.c | 15 ++++++++------- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8431c6..53292abf846c 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; - swiotlb_init(); + swiotlb_init(1); } void __init pci_swiotlb_init(void) @@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d792db..b152de3e64d4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04f638d82fb3..df2c9e932b37 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1e..ea20ef7ca523 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -52,8 +52,7 @@ void __init pci_swiotlb_init(void) if (swiotlb_force) swiotlb = 1; if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); + swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 59bafa690290..eb9bdb4d4854 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -20,8 +20,7 @@ struct scatterlist; */ #define IO_TLB_SHIFT 11 -extern void -swiotlb_init(void); +extern void swiotlb_init(int verbose); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -94,4 +93,5 @@ extern void __init swiotlb_free(void); static inline void swiotlb_free(void) { } #endif +extern void swiotlb_print_info(void); #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index eee512b63f17..0c12d7cce300 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -123,8 +123,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -142,7 +143,7 @@ static void swiotlb_print_info(unsigned long bytes) * structures for the software IO TLB used to implement the DMA API. */ void __init -swiotlb_init_with_default_size(size_t default_size) +swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long i, bytes; @@ -178,14 +179,14 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - swiotlb_print_info(bytes); + if (verbose) + swiotlb_print_info(); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -262,7 +263,7 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); late_alloc = 1; -- cgit v1.2.3 From 75f1cdf1dda92cae037ec848ae63690d91913eac Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:20 +0900 Subject: x86: Handle HW IOMMU initialization failure gracefully If HW IOMMU initialization fails (Intel VT-d often does this, typically due to BIOS bugs), we fall back to nommu. It doesn't work for the majority since nowadays we have more than 4GB memory so we must use swiotlb instead of nommu. The problem is that it's too late to initialize swiotlb when HW IOMMU initialization fails. We need to allocate swiotlb memory earlier from bootmem allocator. Chris explained the issue in detail: http://marc.info/?l=linux-kernel&m=125657444317079&w=2 The current x86 IOMMU initialization sequence is too complicated and handling the above issue makes it more hacky. This patch changes x86 IOMMU initialization sequence to handle the above issue cleanly. The new x86 IOMMU initialization sequence are: 1. we initialize the swiotlb (and setting swiotlb to 1) in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by the boot option, we finish here. 2. we call the detection functions of all the IOMMUs 3. the detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 4. if the IOMMU initialization function doesn't need to swiotlb then sets swiotlb to zero (e.g. the initialization is sucessful). 5. if we find that swiotlb is set to zero, we free swiotlb resource. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iommu.h | 1 - arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/amd_iommu_init.c | 2 +- arch/x86/kernel/aperture_64.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 10 +--------- arch/x86/kernel/pci-dma.c | 21 +++++++++++++-------- arch/x86/kernel/pci-gart_64.c | 1 + arch/x86/kernel/pci-nommu.c | 9 --------- arch/x86/kernel/pci-swiotlb.c | 7 +++---- drivers/pci/dmar.c | 3 +-- drivers/pci/intel-iommu.c | 6 ++++-- lib/swiotlb.c | 4 +++- 12 files changed, 29 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 878b30715766..df42a712361f 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,7 +2,6 @@ #define _ASM_X86_IOMMU_H static inline void iommu_shutdown_noop(void) {} -extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a99..66237fde758f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2110,8 +2110,8 @@ int __init amd_iommu_init_dma_ops(void) prealloc_protection_domains(); iommu_detected = 1; - force_iommu = 1; bad_dma_address = 0; + swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c41aabddaa2a..0d4581e602a4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1330,7 +1330,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 03933cf0b63c..e0dfb6856aa2 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -458,7 +458,7 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (swiotlb && !valid_agp) { + } else if (!valid_agp) { /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 47bd419ea4d2..833f491440b9 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1360,7 +1360,7 @@ void __init detect_calgary(void) * if the user specified iommu=off or iommu=soft or we found * another HW IOMMU already, bail out. */ - if (swiotlb || no_iommu || iommu_detected) + if (no_iommu || iommu_detected) return; if (!use_calgary) @@ -1445,10 +1445,6 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", specified_table_size); - /* swiotlb for devices that aren't behind the Calgary. */ - if (max_pfn > MAX_DMA32_PFN) - swiotlb = 1; - x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1476,11 +1472,7 @@ int __init calgary_iommu_init(void) return ret; } - force_iommu = 1; bad_dma_address = 0x0; - /* dma_ops is set to swiotlb or nommu */ - if (!dma_ops) - dma_ops = &nommu_dma_ops; return 0; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index bed05e2e5890..a234e63c2656 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -124,24 +124,24 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { + /* swiotlb is forced by the boot option */ + int use_swiotlb = swiotlb; #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif + pci_swiotlb_init(); + if (use_swiotlb) + return; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ gart_iommu_hole_init(); detect_calgary(); detect_intel_iommu(); + /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); - - pci_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, @@ -291,10 +291,15 @@ static int __init pci_iommu_init(void) #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif - x86_init.iommu.iommu_init(); - no_iommu_init(); + if (swiotlb) { + printk(KERN_INFO "PCI-DMA: " + "Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } else + swiotlb_free(); + return 0; } /* Must execute after PCI subsystem */ diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0410bd30060d..919182e15d1e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -833,6 +833,7 @@ int __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; x86_platform.iommu_shutdown = gart_iommu_shutdown; + swiotlb = 0; return 0; } diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4330cd..875e3822ae61 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, }; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index ea20ef7ca523..17ce4221bd03 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -46,13 +46,12 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif - if (swiotlb_force) - swiotlb = 1; if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; - } + } else + dma_ops = &nommu_dma_ops; } diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index bce9cd7c755a..437399667e5a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -613,8 +613,7 @@ void __init detect_intel_iommu(void) "x2apic and Intr-remapping.\n"); #endif #ifdef CONFIG_DMAR - if (ret && !no_iommu && !iommu_detected && !swiotlb && - !dmar_disabled) + if (ret && !no_iommu && !iommu_detected && !dmar_disabled) iommu_detected = 1; #endif #ifdef CONFIG_X86 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b1e97e682500..43d755a2e14a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3231,7 +3231,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -3252,7 +3252,9 @@ int __init intel_iommu_init(void) "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); init_timer(&unmap_timer); - force_iommu = 1; +#ifdef CONFIG_SWIOTLB + swiotlb = 0; +#endif dma_ops = &intel_dma_ops; init_iommu_sysfs(); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 0c12d7cce300..e6755a0574fb 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -109,8 +109,10 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) + if (!strcmp(str, "force")) { swiotlb_force = 1; + swiotlb = 1; + } return 1; } __setup("swiotlb=", setup_io_tlb_npages); -- cgit v1.2.3 From 72d03802b8b5c841ab1da82bff0652628cbadf60 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 21:35:17 +0900 Subject: x86, 32-bit: Fix swiotlb boot crash Ingo Molnar reported this boot crash: [ 8.655620] pata_amd 0000:00:06.0: version 0.4.1 [ 8.660286] BUG: unable to handle kernel NULL pointer dereference at 00000034 [ 8.663572] IP: [] dma_supported+0x3b/0xa4 [ 8.663572] *pde = 00000000 Initialize dma_ops properly in the 32-bit case. Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a234e63c2656..63eebee80e75 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -129,6 +129,8 @@ void __init pci_iommu_alloc(void) #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); +#else + dma_ops = &nommu_dma_ops; #endif pci_swiotlb_init(); if (use_swiotlb) -- cgit v1.2.3 From b4941a9a606f0131559cc040b64e8437ac7b32c5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Nov 2009 14:37:58 +0100 Subject: x86: Add iommu_init to x86_init_ops, fix build Most of the time x86_init.h is included in pci-dma.c - but not always, leading to this rare build failure: arch/x86/kernel/pci-dma.c:296: error: 'x86_init' undeclared (first use in this function) So include asm/x86_init.h explicitly. Cc: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 63eebee80e75..f79870e89266 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,6 +11,7 @@ #include #include #include +#include static int forbid_dac __read_mostly; -- cgit v1.2.3 From 85160b92fbd35321104819283c91bfed2b553e3c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 10 Nov 2009 13:49:24 -0500 Subject: x86: Add new Intel CPU cache size descriptors The latest rev of Intel doc AP-485 details new cache descriptors that we don't yet support. 12MB, 18MB and 24MB 24-way assoc L3 caches. Signed-off-by: Dave Jones LKML-Reference: <20091110184924.GA20337@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3e..14103924b627 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -102,6 +102,9 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ + { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ + { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ { 0x00, 0, 0} }; -- cgit v1.2.3 From e02e0e1a130b9ca37c5186d38ad4b3aaf58bb149 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 10 Nov 2009 15:01:20 -0500 Subject: x86: Fix typo in Intel CPU cache size descriptor I double-checked the datasheet. One of the existing descriptors has a typo: it should be 2MB not 2038 KB. Signed-off-by: Dave Jones Cc: # .3x.x: 85160b9: x86: Add new Intel CPU cache size descriptors Cc: # .3x.x LKML-Reference: <20091110200120.GA27090@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 14103924b627..8178d0352935 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -94,7 +94,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ -- cgit v1.2.3 From 200a9ae2801bc725f2c41ab13f6e0fb1610d2fb6 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 10 Nov 2009 13:58:35 -0600 Subject: x86: Remove asm/apicnum.h arch/x86/include/asm/apicnum.h is not referenced anywhere anymore. Its definitions appear in apicdef.h. Remove it. Signed-off-by: Dimitri Sivanich Acked-by: Cyrill Gorcunov Acked-by: Mike Travis LKML-Reference: <20091110195835.GA4393@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicnum.h | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 arch/x86/include/asm/apicnum.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c607ce..000000000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_APICNUM_H -#define _ASM_X86_APICNUM_H - -/* define MAX_IO_APICS */ -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif - -#endif /* _ASM_X86_APICNUM_H */ -- cgit v1.2.3 From e84446de5cccd90de7d7ec46527d3b343b022a09 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 10 Nov 2009 15:46:52 -0800 Subject: x86 VSDO: Fix Kconfig help COMPAT_VDSO has 2 help text blocks, but kconfig only uses the last one found, so merge the 2 blocks. It would be real nice if kconfig would warn about this. Signed-off-by: Randy Dunlap LKML-Reference: <4AF9FB6C.70003@oracle.com> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72ace9515a07..618dedeb31fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1602,7 +1602,7 @@ config COMPAT_VDSO depends on X86_32 || IA32_EMULATION ---help--- Map the 32-bit VDSO to the predictable old-style address too. - ---help--- + Say N here if you are running a sufficiently recent glibc version (2.3.3 or later), to remove the high-mapped VDSO mapping and to exclusively use the randomized VDSO. -- cgit v1.2.3 From ce6b5d768c79b9d5dd6345c033bae781d5ca9b8e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 11 Nov 2009 15:51:25 +0800 Subject: x86: Mark the thermal init functions __init Mark the thermal init functions __init so that the init memory can be freed. Signed-off-by: Yong Wang LKML-Reference: <20091111075125.GA17900@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7f3cf36ed124..8a73d5c12a05 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -256,7 +256,7 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } -void mcheck_intel_therm_init(void) +void __init mcheck_intel_therm_init(void) { /* * This function is only called on boot CPU. Save the init thermal @@ -268,7 +268,7 @@ void mcheck_intel_therm_init(void) lvtthmr_init = apic_read(APIC_LVTTHMR); } -void intel_init_thermal(struct cpuinfo_x86 *c) +void __init intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); int tm2 = 0; -- cgit v1.2.3 From b18485e7acfe1a634615d1c628ef644c0d58d472 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 12 Nov 2009 00:03:28 +0900 Subject: swiotlb: Remove the swiotlb variable usage POWERPC doesn't expect it to be used. This fixes the linux-next build failure reported by Stephen Rothwell: lib/swiotlb.c: In function 'setup_io_tlb_npages': lib/swiotlb.c:114: error: 'swiotlb' undeclared (first use in this function) Reported-by: Stephen Rothwell Signed-off-by: FUJITA Tomonori Cc: peterz@infradead.org LKML-Reference: <20091112000258F.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/swiotlb.h | 5 +++-- arch/x86/kernel/pci-dma.c | 5 +---- arch/x86/kernel/pci-swiotlb.c | 13 ++++++++++++- lib/swiotlb.c | 5 ++--- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20174fb..940f13a213f8 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -9,11 +9,12 @@ extern int swiotlb_force; #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern void pci_swiotlb_init(void); +extern int pci_swiotlb_init(void); #else #define swiotlb 0 -static inline void pci_swiotlb_init(void) +static inline int pci_swiotlb_init(void) { + return 0; } #endif diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f79870e89266..0b11bf18f540 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -125,16 +125,13 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { - /* swiotlb is forced by the boot option */ - int use_swiotlb = swiotlb; #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #else dma_ops = &nommu_dma_ops; #endif - pci_swiotlb_init(); - if (use_swiotlb) + if (pci_swiotlb_init()) return; gart_iommu_hole_init(); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 17ce4221bd03..a6e5d0ffa3a7 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,16 +42,27 @@ static struct dma_map_ops swiotlb_dma_ops = { .dma_supported = NULL, }; -void __init pci_swiotlb_init(void) +/* + * pci_swiotlb_init - initialize swiotlb if necessary + * + * This returns non-zero if we are forced to use swiotlb (by the boot + * option). + */ +int __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif + if (swiotlb_force) + swiotlb = 1; + if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } else dma_ops = &nommu_dma_ops; + + return swiotlb_force; } diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e6755a0574fb..795472d8ae24 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -109,10 +109,9 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) { + if (!strcmp(str, "force")) swiotlb_force = 1; - swiotlb = 1; - } + return 1; } __setup("swiotlb=", setup_io_tlb_npages); -- cgit v1.2.3 From 9f15226e75583547aaf542c6be4bdac1060dd425 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 11 Nov 2009 20:03:29 +0100 Subject: x86, ucode-amd: Ensure ucode update on suspend/resume after CPU off/online cycle When switching a CPU offline/online and then doing suspend/resume, ucode is not updated on this CPU. This is due to the microcode_fini_cpu() call which frees uci->mc when setting the CPU offline: static void microcode_fini_cpu_amd(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; vfree(uci->mc); uci->mc = NULL; } When the CPU is set online uci->mc is still NULL because no ucode update is required. Finally this prevents ucode update when resuming after suspend: static enum ucode_state microcode_resume_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!uci->mc) return UCODE_NFOUND; ... } Fix is to check whether uci->mc is valid before microcode_resume_cpu() is called. Signed-off-by: Andreas Herrmann Cc: dimm LKML-Reference: <20091111190329.GF18592@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index d2a816021d9f..adf234061540 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -393,7 +393,7 @@ static enum ucode_state microcode_update_cpu(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - if (uci->valid) + if (uci->valid && uci->mc) ustate = microcode_resume_cpu(cpu); else ustate = microcode_init_cpu(cpu); -- cgit v1.2.3 From cffd377e5879ea58522224a785a083f201afd80e Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 12 Nov 2009 15:52:40 +0900 Subject: x86, mce: Fix __init annotations The intel_init_thermal() is called from resume path, so it cannot be marked as __init. OTOH mce_banks_init() is only called from __mcheck_cpu_cap_init() which is marked as __cpuinit, so it can be also marked as __cpuinit. Signed-off-by: Hidetoshi Seto Acked-by: Yong Wang LKML-Reference: <4AFBB0B8.2070501@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0d4102031a4c..5f277cad2ed7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1201,7 +1201,7 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int mce_banks_init(void) +static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; @@ -1242,7 +1242,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) WARN_ON(banks != 0 && b != banks); banks = b; if (!mce_banks) { - int err = mce_banks_init(); + int err = __mcheck_cpu_mce_banks_init(); if (err) return err; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 8a73d5c12a05..4fef985fc221 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -268,7 +268,7 @@ void __init mcheck_intel_therm_init(void) lvtthmr_init = apic_read(APIC_LVTTHMR); } -void __init intel_init_thermal(struct cpuinfo_x86 *c) +void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); int tm2 = 0; -- cgit v1.2.3 From db48cccc7c709ccfa7cb4ac702bc27c216bffee7 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 12 Nov 2009 11:25:34 +0900 Subject: perf_event, x86: Annotate init functions and data Annotate init functions and data with __init and __initconst. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <4AFB721E.8070203@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2e20bca3cca1..bd8743024204 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -245,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static const u64 nehalem_hw_cache_event_ids +static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -336,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids }, }; -static const u64 core2_hw_cache_event_ids +static __initconst u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -427,7 +427,7 @@ static const u64 core2_hw_cache_event_ids }, }; -static const u64 atom_hw_cache_event_ids +static __initconst u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -536,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static const u64 amd_hw_cache_event_ids +static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -1964,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { .priority = 1 }; -static struct x86_pmu p6_pmu = { +static __initconst struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, @@ -1992,7 +1992,7 @@ static struct x86_pmu p6_pmu = { .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu intel_pmu = { +static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, @@ -2016,7 +2016,7 @@ static struct x86_pmu intel_pmu = { .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu amd_pmu = { +static __initconst struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, @@ -2037,7 +2037,7 @@ static struct x86_pmu amd_pmu = { .get_event_idx = gen_get_event_idx, }; -static int p6_pmu_init(void) +static __init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -2071,7 +2071,7 @@ static int p6_pmu_init(void) return 0; } -static int intel_pmu_init(void) +static __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -2144,7 +2144,7 @@ static int intel_pmu_init(void) return 0; } -static int amd_pmu_init(void) +static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) -- cgit v1.2.3 From 24a065624dcdd91e8bfd0f14113feb91c7ed11ca Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 05:33:18 -0700 Subject: sysctl x86: Remove dead binary sysctl support Now that sys_sysctl is a generic wrapper around /proc/sys .ctl_name and .strategy members of sysctl tables are dead code. Remove them. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: Eric W. Biederman --- arch/x86/kernel/vsyscall_64.c | 2 +- arch/x86/vdso/vdso32-setup.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..e02d92d12bcd 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -237,7 +237,7 @@ static ctl_table kernel_table2[] = { }; static ctl_table kernel_root_table2[] = { - { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, + { .procname = "kernel", .mode = 0555, .child = kernel_table2 }, {} }; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 58bc00f68b12..02b442e92007 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -393,7 +393,6 @@ static ctl_table abi_table2[] = { static ctl_table abi_root_table2[] = { { - .ctl_name = CTL_ABI, .procname = "abi", .mode = 0555, .child = abi_table2 -- cgit v1.2.3 From 15cd8812ab2ce62a2f779e93a8398bdad752291a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 12 Nov 2009 18:15:43 -0500 Subject: x86: Remove the CPU cache size printk's They aren't really useful, and they pollute the dmesg output a lot (especially on machines with many cores). Also the same information can be trivially found out from userspace. Reported-by: Mike Travis Signed-off-by: Dave Jones Acked-by: H. Peter Anvin Cc: Andi Kleen Cc: Heiko Carstens Cc: Roland Dreier Cc: Randy Dunlap Cc: Tejun Heo Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: <20091112231542.GA7129@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e2bc3e..0df4c2b7107f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } - if (trace) - printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if (l1i) - printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(KERN_CONT ", L1 D cache: %dK\n", l1d); - else - printk(KERN_CONT "\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; -- cgit v1.2.3 From d9b263528e01bfbaf716b51f38606b3dfe5ac1e9 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 13 Nov 2009 14:57:00 -0500 Subject: x86, setup: Store the boot cursor state Add a field to store the boot cursor state and implement this for VGA on x86. This can then be used to set the default policy for the boot console. Signed-off-by: Matthew Garrett LKML-Reference: <1258142222-16092-1-git-send-email-mjg@redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/boot/video.c | 6 ++++++ include/linux/screen_info.h | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index d42da3802499..f767164cd5df 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -27,6 +27,12 @@ static void store_cursor_position(void) boot_params.screen_info.orig_x = oreg.dl; boot_params.screen_info.orig_y = oreg.dh; + + if (oreg.ch & 0x20) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; + + if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f)) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; } static void store_video_mode(void) diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 1ee2c05142f6..899fbb487c94 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -14,7 +14,8 @@ struct screen_info { __u16 orig_video_page; /* 0x04 */ __u8 orig_video_mode; /* 0x06 */ __u8 orig_video_cols; /* 0x07 */ - __u16 unused2; /* 0x08 */ + __u8 flags; /* 0x08 */ + __u8 unused2; /* 0x09 */ __u16 orig_video_ega_bx;/* 0x0a */ __u16 unused3; /* 0x0c */ __u8 orig_video_lines; /* 0x0e */ @@ -65,6 +66,8 @@ struct screen_info { #define VIDEO_TYPE_EFI 0x70 /* EFI graphic mode */ +#define VIDEO_FLAGS_NOCURSOR (1 << 0) /* The video mode has no cursor set */ + #ifdef __KERNEL__ extern struct screen_info screen_info; -- cgit v1.2.3 From 0388423dba2217b4e5b6c61690b0506d13b25a49 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 13 Nov 2009 15:30:00 -0500 Subject: x86: Minimise printk spew from per-vendor init code In the default case where the kernel supports all CPU vendors, we currently print out a bunch of not useful messages on every system. 32-bit: KERNEL supported cpus: Intel GenuineIntel AMD AuthenticAMD NSC Geode by NSC Cyrix CyrixInstead Centaur CentaurHauls Transmeta GenuineTMx86 Transmeta TransmetaCPU UMC UMC UMC UMC 64-bit: KERNEL supported cpus: Intel GenuineIntel AMD AuthenticAMD Centaur CentaurHauls Given that "what CPUs does the kernel support" isn't useful for the "support everything" case, we can suppress these printk's. Signed-off-by: Dave Jones LKML-Reference: <20091113203000.GA19160@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..617a29f95b3c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -656,6 +656,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) void __init early_cpu_init(void) { +#ifdef PROCESSOR_SELECT const struct cpu_dev *const *cdev; int count = 0; @@ -676,7 +677,7 @@ void __init early_cpu_init(void) cpudev->c_ident[j]); } } - +#endif early_identify_cpu(&boot_cpu_data); } -- cgit v1.2.3 From b01c845f0f2e3f9e54e6a78d5d56895f5b95e818 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 13 Nov 2009 14:38:26 -0800 Subject: x86: Remove CPU cache size output for non-Intel too As Dave Jones said about the output in intel_cacheinfo.c: "They aren't useful, and pollute the dmesg output a lot (especially on machines with many cores). Also the same information can be trivially found out from userspace." Give the generic display_cacheinfo() function the same treatment. Signed-off-by: Roland Dreier Acked-by: Dave Jones Cc: Mike Travis Cc: Andi Kleen Cc: Heiko Carstens Cc: Randy Dunlap Cc: Tejun Heo Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 617a29f95b3c..9db1e2425c27 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); #ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ @@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #endif c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 31c997cac76e62918858a432fff6e43fd48425f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 10:34:41 +0100 Subject: x86: Fix cpu_devs[] initialization in early_cpu_init() Yinghai Lu noticed that this commit: 0388423: x86: Minimise printk spew from per-vendor init code mistakenly left out the initialization of cpu_devs[] in the !PROCESSOR_SELECT case. Fix it. Reported-by: Yinghai Lu Cc: Dave Jones LKML-Reference: <20091113203000.GA19160@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9db1e2425c27..61242a56c2d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -651,28 +651,34 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) void __init early_cpu_init(void) { -#ifdef PROCESSOR_SELECT const struct cpu_dev *const *cdev; int count = 0; +#ifdef PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); +#endif + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; - unsigned int j; if (count >= X86_VENDOR_NUM) break; cpu_devs[count] = cpudev; count++; - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); +#ifdef PROCESSOR_SELECT + { + unsigned int j; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } } - } #endif + } early_identify_cpu(&boot_cpu_data); } -- cgit v1.2.3 From 68efa37df779b3e04280598e8b5b3a1919b65fee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 01:35:29 +0100 Subject: hw-breakpoints, x86: Fix modular KVM build This build error: arch/x86/kvm/x86.c:3655: error: implicit declaration of function 'hw_breakpoint_restore' Happens because in the CONFIG_KVM=m case there's no 'CONFIG_KVM' define in the kernel - it's CONFIG_KVM_MODULE in that case. Make the prototype available unconditionally. Cc: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/debugreg.h | 2 -- arch/x86/kernel/hw_breakpoint.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 0f6e92af4227..fdabd8435765 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -96,9 +96,7 @@ static inline int hw_breakpoint_active(void) extern void aout_dump_debugregs(struct user *dump); -#ifdef CONFIG_KVM extern void hw_breakpoint_restore(void); -#endif #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 57dcee5fa958..752daebe91c6 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -43,6 +43,7 @@ /* Per cpu debug control register value */ DEFINE_PER_CPU(unsigned long, dr7); +EXPORT_PER_CPU_SYMBOL(dr7); /* Per cpu debug address registers values */ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); @@ -409,6 +410,7 @@ void aout_dump_debugregs(struct user *dump) dump->u_debugreg[7] = dr7; } +EXPORT_SYMBOL_GPL(aout_dump_debugregs); /* * Release the user breakpoints used by ptrace @@ -424,7 +426,6 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) } } -#ifdef CONFIG_KVM void hw_breakpoint_restore(void) { set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); @@ -435,7 +436,6 @@ void hw_breakpoint_restore(void) set_debugreg(__get_cpu_var(dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); -#endif /* * Handle debug exception notifications. -- cgit v1.2.3 From a3b28ee1090072092e2be043c24df94230e725b2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:36 +0900 Subject: x86: Set dma_ops to nommu_dma_ops by default We set dma_ops to nommu_dma_ops at two different places for x86_32 and x86_64. This unifies them by setting dma_ops to nommu_dma_ops by default. Signed-off-by: FUJITA Tomonori LKML-Reference: <1258199198-16657-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 4 +--- arch/x86/kernel/pci-swiotlb.c | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0b11bf18f540..f170b5364b41 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -15,7 +15,7 @@ static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops; +struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -128,8 +128,6 @@ void __init pci_iommu_alloc(void) #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); -#else - dma_ops = &nommu_dma_ops; #endif if (pci_swiotlb_init()) return; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a6e5d0ffa3a7..e36e71daa44c 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -61,8 +61,7 @@ int __init pci_swiotlb_init(void) if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; - } else - dma_ops = &nommu_dma_ops; + } return swiotlb_force; } -- cgit v1.2.3 From 94a15564ac63af6bb2ff8d4d04f86d5e7ee0278a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:37 +0900 Subject: x86: Move iommu_shutdown_noop to x86_init.c iommu_init_noop() is in arch/x86/kernel/x86_init.c but iommu_shutdown_noop() in arch/x86/include/asm/iommu.h. This moves iommu_shutdown_noop() to x86_init.c for consistency. Signed-off-by: FUJITA Tomonori LKML-Reference: <1258199198-16657-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iommu.h | 1 - arch/x86/kernel/x86_init.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index df42a712361f..345c99cef152 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -static inline void iommu_shutdown_noop(void) {} extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c46984d122dc..80f3ae24b974 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,6 +20,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } +void __init iommu_shutdown_noop(void) { } /* * The platform setup functions are preset with the default functions -- cgit v1.2.3 From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:38 +0900 Subject: swiotlb: Remove duplicate swiotlb_force extern declarations Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/swiotlb.h | 2 -- arch/x86/include/asm/swiotlb.h | 4 ---- include/linux/swiotlb.h | 2 ++ 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index dcbaea7ce128..f0acde68aaea 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -4,8 +4,6 @@ #include #include -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern void pci_swiotlb_init(void); diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 940f13a213f8..87ffcb12a1b8 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -3,10 +3,6 @@ #include -/* SWIOTLB interface */ - -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern int pci_swiotlb_init(void); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index eb9bdb4d4854..febedcf67c7e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,8 @@ struct device; struct dma_attrs; struct scatterlist; +extern int swiotlb_force; + /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? -- cgit v1.2.3 From f4131c6259b46bd84dcfcd3bb9ed08e99e2875a4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 21:26:50 +0900 Subject: x86: Make calgary_iommu_init() static This makes calgary_iommu_init() static and moves it to remove the forward declaration. Signed-off-by: FUJITA Tomonori Cc: muli@il.ibm.com LKML-Reference: <20091114212603U.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 833f491440b9..c84ad037f586 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1345,7 +1345,24 @@ static void __init get_tce_space_from_tar(void) return; } -int __init calgary_iommu_init(void); +static int __init calgary_iommu_init(void) +{ + int ret; + + /* ok, we're trying to use Calgary - let's roll */ + printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); + + ret = calgary_init(); + if (ret) { + printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " + "falling back to no_iommu\n", ret); + return ret; + } + + bad_dma_address = 0x0; + + return 0; +} void __init detect_calgary(void) { @@ -1458,25 +1475,6 @@ cleanup: } } -int __init calgary_iommu_init(void) -{ - int ret; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - bad_dma_address = 0x0; - - return 0; -} - static int __init calgary_parse_options(char *p) { unsigned int bridge; -- cgit v1.2.3 From 14722485830fe6baba738b91d96f06fbd6cf7a18 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 13 Nov 2009 11:56:24 +0000 Subject: x86-64: __copy_from_user_inatomic() adjustments This v2.6.26 commit: ad2fc2c: x86: fix copy_user on x86 rendered __copy_from_user_inatomic() identical to copy_user_generic(), yet didn't make the former just call the latter from an inline function. Furthermore, this v2.6.19 commit: b885808: [PATCH] Add proper sparse __user casts to __copy_to_user_inatomic converted the return type of __copy_to_user_inatomic() from unsigned long to int, but didn't do the same to __copy_from_user_inatomic(). Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Alexander Viro Cc: Arjan van de Ven Cc: Andi Kleen Cc: LKML-Reference: <4AFD5778020000780001F8F4@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 7 +++++-- arch/x86/kernel/x8664_ksyms_64.c | 1 - arch/x86/lib/copy_user_64.S | 6 ------ 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index ce6fec7ce38d..7adebacaa325 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -193,8 +193,11 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, - unsigned size); +static __must_check __always_inline int +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) +{ + return copy_user_generic(dst, (__force const void *)src, size); +} static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a0cdd8cc1d67..cd54276b6be8 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -32,7 +32,6 @@ EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 4be3c415b3e9..39369985f1cb 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -96,12 +96,6 @@ ENTRY(copy_user_generic) CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) -- cgit v1.2.3 From dc186ad741c12ae9ecac8b89e317ef706fdaf8f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 01:09:48 +0900 Subject: workqueue: Add debugobjects support Add debugobject support to track the life time of work_structs. While at it, remove duplicate definition of INIT_DELAYED_WORK_ON_STACK(). Signed-off-by: Thomas Gleixner Signed-off-by: Tejun Heo --- arch/x86/kernel/smpboot.c | 4 +- include/linux/workqueue.h | 38 ++++++++++---- kernel/workqueue.c | 131 ++++++++++++++++++++++++++++++++++++++++++++-- lib/Kconfig.debug | 8 +++ 4 files changed, 166 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920e..ba43dfed353d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -687,7 +687,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; - INIT_WORK(&c_idle.work, do_fork_idle); + INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); alternatives_smp_switch(1); @@ -713,6 +713,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) if (IS_ERR(c_idle.idle)) { printk("failed fork for CPU %d\n", cpu); + destroy_work_on_stack(&c_idle.work); return PTR_ERR(c_idle.idle); } @@ -831,6 +832,7 @@ do_rest: smpboot_restore_warm_reset_vector(); } + destroy_work_on_stack(&c_idle.work); return boot_error; } diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index cf24c20de9e4..9466e860d8c2 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -25,6 +25,7 @@ typedef void (*work_func_t)(struct work_struct *work); struct work_struct { atomic_long_t data; #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ +#define WORK_STRUCT_STATIC 1 /* static initializer (debugobjects) */ #define WORK_STRUCT_FLAG_MASK (3UL) #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) struct list_head entry; @@ -35,6 +36,7 @@ struct work_struct { }; #define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) +#define WORK_DATA_STATIC_INIT() ATOMIC_LONG_INIT(2) struct delayed_work { struct work_struct work; @@ -63,7 +65,7 @@ struct execute_work { #endif #define __WORK_INITIALIZER(n, f) { \ - .data = WORK_DATA_INIT(), \ + .data = WORK_DATA_STATIC_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ @@ -91,6 +93,14 @@ struct execute_work { #define PREPARE_DELAYED_WORK(_work, _func) \ PREPARE_WORK(&(_work)->work, (_func)) +#ifdef CONFIG_DEBUG_OBJECTS_WORK +extern void __init_work(struct work_struct *work, int onstack); +extern void destroy_work_on_stack(struct work_struct *work); +#else +static inline void __init_work(struct work_struct *work, int onstack) { } +static inline void destroy_work_on_stack(struct work_struct *work) { } +#endif + /* * initialize all of a work item in one go * @@ -99,24 +109,36 @@ struct execute_work { * to generate better code. */ #ifdef CONFIG_LOCKDEP -#define INIT_WORK(_work, _func) \ +#define __INIT_WORK(_work, _func, _onstack) \ do { \ static struct lock_class_key __key; \ \ + __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\ INIT_LIST_HEAD(&(_work)->entry); \ PREPARE_WORK((_work), (_func)); \ } while (0) #else -#define INIT_WORK(_work, _func) \ +#define __INIT_WORK(_work, _func, _onstack) \ do { \ + __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ PREPARE_WORK((_work), (_func)); \ } while (0) #endif +#define INIT_WORK(_work, _func) \ + do { \ + __INIT_WORK((_work), (_func), 0); \ + } while (0) + +#define INIT_WORK_ON_STACK(_work, _func) \ + do { \ + __INIT_WORK((_work), (_func), 1); \ + } while (0) + #define INIT_DELAYED_WORK(_work, _func) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ @@ -125,22 +147,16 @@ struct execute_work { #define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ do { \ - INIT_WORK(&(_work)->work, (_func)); \ + INIT_WORK_ON_STACK(&(_work)->work, (_func)); \ init_timer_on_stack(&(_work)->timer); \ } while (0) -#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ +#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ init_timer_deferrable(&(_work)->timer); \ } while (0) -#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ - do { \ - INIT_WORK(&(_work)->work, (_func)); \ - init_timer_on_stack(&(_work)->timer); \ - } while (0) - /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 12328147132c..ddad63fbb61b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -68,6 +68,116 @@ struct workqueue_struct { #endif }; +#ifdef CONFIG_DEBUG_OBJECTS_WORK + +static struct debug_obj_descr work_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int work_fixup_init(void *addr, enum debug_obj_state state) +{ + struct work_struct *work = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + cancel_work_sync(work); + debug_object_init(work, &work_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int work_fixup_activate(void *addr, enum debug_obj_state state) +{ + struct work_struct *work = addr; + + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + /* + * This is not really a fixup. The work struct was + * statically initialized. We just make sure that it + * is tracked in the object tracker. + */ + if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) { + debug_object_init(work, &work_debug_descr); + debug_object_activate(work, &work_debug_descr); + return 0; + } + WARN_ON_ONCE(1); + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int work_fixup_free(void *addr, enum debug_obj_state state) +{ + struct work_struct *work = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + cancel_work_sync(work); + debug_object_free(work, &work_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr work_debug_descr = { + .name = "work_struct", + .fixup_init = work_fixup_init, + .fixup_activate = work_fixup_activate, + .fixup_free = work_fixup_free, +}; + +static inline void debug_work_activate(struct work_struct *work) +{ + debug_object_activate(work, &work_debug_descr); +} + +static inline void debug_work_deactivate(struct work_struct *work) +{ + debug_object_deactivate(work, &work_debug_descr); +} + +void __init_work(struct work_struct *work, int onstack) +{ + if (onstack) + debug_object_init_on_stack(work, &work_debug_descr); + else + debug_object_init(work, &work_debug_descr); +} +EXPORT_SYMBOL_GPL(__init_work); + +void destroy_work_on_stack(struct work_struct *work) +{ + debug_object_free(work, &work_debug_descr); +} +EXPORT_SYMBOL_GPL(destroy_work_on_stack); + +#else +static inline void debug_work_activate(struct work_struct *work) { } +static inline void debug_work_deactivate(struct work_struct *work) { } +#endif + /* Serializes the accesses to the list of workqueues. */ static DEFINE_SPINLOCK(workqueue_lock); static LIST_HEAD(workqueues); @@ -145,6 +255,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, { unsigned long flags; + debug_work_activate(work); spin_lock_irqsave(&cwq->lock, flags); insert_work(cwq, work, &cwq->worklist); spin_unlock_irqrestore(&cwq->lock, flags); @@ -280,6 +391,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) struct lockdep_map lockdep_map = work->lockdep_map; #endif trace_workqueue_execution(cwq->thread, work); + debug_work_deactivate(work); cwq->current_work = work; list_del_init(cwq->worklist.next); spin_unlock_irq(&cwq->lock); @@ -350,11 +462,18 @@ static void wq_barrier_func(struct work_struct *work) static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, struct wq_barrier *barr, struct list_head *head) { - INIT_WORK(&barr->work, wq_barrier_func); + /* + * debugobject calls are safe here even with cwq->lock locked + * as we know for sure that this will not trigger any of the + * checks and call back into the fixup functions where we + * might deadlock. + */ + INIT_WORK_ON_STACK(&barr->work, wq_barrier_func); __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work)); init_completion(&barr->done); + debug_work_activate(&barr->work); insert_work(cwq, &barr->work, head); } @@ -372,8 +491,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) } spin_unlock_irq(&cwq->lock); - if (active) + if (active) { wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); + } return active; } @@ -451,6 +572,7 @@ out: return 0; wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); return 1; } EXPORT_SYMBOL_GPL(flush_work); @@ -485,6 +607,7 @@ static int try_to_grab_pending(struct work_struct *work) */ smp_rmb(); if (cwq == get_wq_data(work)) { + debug_work_deactivate(work); list_del_init(&work->entry); ret = 1; } @@ -507,8 +630,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq, } spin_unlock_irq(&cwq->lock); - if (unlikely(running)) + if (unlikely(running)) { wait_for_completion(&barr.done); + destroy_work_on_stack(&barr.work); + } } static void wait_on_work(struct work_struct *work) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..c91f0519d493 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -298,6 +298,14 @@ config DEBUG_OBJECTS_TIMERS timer routines to track the life time of timer objects and validate the timer operations. +config DEBUG_OBJECTS_WORK + bool "Debug work objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + work queue routines to track the life time of work objects and + validate the work operations. + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 -- cgit v1.2.3 From 62ad33f67003b9a7b6013f0511579b9805e11626 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 16 Nov 2009 11:44:30 +0900 Subject: x86: Don't put iommu_shutdown_noop() in init section It causes kernel panic on shutdown or reboot. Signed-off-by: Hiroshi Shimamoto Acked-by: FUJITA Tomonori LKML-Reference: <4B00BC8E.50801@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/x86_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 80f3ae24b974..d11c5ff7c65e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,7 +20,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } -void __init iommu_shutdown_noop(void) { } +void iommu_shutdown_noop(void) { } /* * The platform setup functions are preset with the default functions -- cgit v1.2.3 From 411462f62a65eeae7f451c6eb7a38b9d8759c61a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 11:52:39 +0100 Subject: x86: Fix printk format due to variable type change clockevents.mult became u32. Fix the printk format. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f0717..cf4ee5195c5e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -662,7 +662,7 @@ static int __init calibrate_APIC_clock(void) calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); + apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", calibration_result); -- cgit v1.2.3 From 303fc0870f8fbfabe260c5c32b18e53458d597ea Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 12 Nov 2009 13:09:31 -0500 Subject: x86: AMD Northbridge: Verify NB's node is online Fix panic seen on some IBM and HP systems on 2.6.32-rc6: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] find_next_bit+0x77/0x9c [...] [] cpumask_next_and+0x2e/0x3b [] pci_device_probe+0x8e/0xf5 [] ? driver_sysfs_add+0x47/0x6c [] driver_probe_device+0xd9/0x1f9 [] __driver_attach+0x58/0x7c [] ? __driver_attach+0x0/0x7c [] bus_for_each_dev+0x54/0x89 [] driver_attach+0x19/0x1b [] bus_add_driver+0xd3/0x23d [] driver_register+0x98/0x109 [] __pci_register_driver+0x63/0xd3 [] ? up_read+0x26/0x2a [] ? k8temp_init+0x0/0x20 [k8temp] [] k8temp_init+0x1e/0x20 [k8temp] [] do_one_initcall+0x6d/0x185 [] sys_init_module+0xd3/0x236 [] system_call_fastpath+0x16/0x1b I put in a printk and commented out the set_dev_node() call when and got this output: quirk_amd_nb_node: current numa_node = 0x0, would set to val & 7 = 0x0 quirk_amd_nb_node: current numa_node = 0x0, would set to val & 7 = 0x1 quirk_amd_nb_node: current numa_node = 0x0, would set to val & 7 = 0x2 quirk_amd_nb_node: current numa_node = 0x0, would set to val & 7 = 0x3 I.e. the issue appears to be that the HW has set val to a valid value, however, the system is only configured for a single node -- 0, the others are offline. Check to see if the node is actually online before setting the numa node for an AMD northbridge in quirk_amd_nb_node(). Signed-off-by: Prarit Bhargava Cc: bhavna.sarathy@amd.com Cc: jbarnes@virtuousgeek.org Cc: andreas.herrmann3@amd.com LKML-Reference: <20091112180933.12532.98685.sendpatchset@prarit.bos.redhat.com> [ v2: clean up the code and add comments ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 6c3b2c6fd772..18093d7498f0 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -499,6 +499,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) { struct pci_dev *nb_ht; unsigned int devfn; + u32 node; u32 val; devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); @@ -507,7 +508,13 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) return; pci_read_config_dword(nb_ht, 0x60, &val); - set_dev_node(&dev->dev, val & 7); + node = val & 7; + /* + * Some hardware may return an invalid node ID, + * so check it first: + */ + if (node_online(node)) + set_dev_node(&dev->dev, node); pci_dev_put(nb_ht); } -- cgit v1.2.3 From 3c93ca00eeeb774c7dd666cc7286a9e90c53e998 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Nov 2009 15:42:18 +0100 Subject: x86: Add missing might_fault() checks to copy_{to,from}_user() On x86-64, copy_[to|from]_user() rely on assembly routines that never call might_fault(), making us missing various lockdep checks. This doesn't apply to __copy_from,to_user() that explicitly handle these calls, neither is it a problem in x86-32 where copy_to,from_user() rely on the "__" prefixed versions that also call might_fault(). Signed-off-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra LKML-Reference: <1258382538-30979-1-git-send-email-fweisbec@gmail.com> [ v2: fix module export ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 10 +++++++++- arch/x86/kernel/x8664_ksyms_64.c | 2 +- arch/x86/lib/copy_user_64.S | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 7adebacaa325..46324c6a4f6e 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -19,7 +19,7 @@ __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long -copy_to_user(void __user *to, const void *from, unsigned len); +_copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long _copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long @@ -32,6 +32,7 @@ static inline unsigned long __must_check copy_from_user(void *to, int sz = __compiletime_object_size(to); int ret = -EFAULT; + might_fault(); if (likely(sz == -1 || sz >= n)) ret = _copy_from_user(to, from, n); #ifdef CONFIG_DEBUG_VM @@ -41,6 +42,13 @@ static inline unsigned long __must_check copy_from_user(void *to, return ret; } +static __always_inline __must_check +int copy_to_user(void __user *dst, const void *src, unsigned size) +{ + might_fault(); + + return _copy_to_user(dst, src, size); +} static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index cd54276b6be8..a1029769b6f2 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); -EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(_copy_to_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 39369985f1cb..cf889d4e076a 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -65,7 +65,7 @@ .endm /* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) +ENTRY(_copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -75,7 +75,7 @@ ENTRY(copy_to_user) jae bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_to_user) +ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ ENTRY(_copy_from_user) -- cgit v1.2.3 From e79c65a97c01d5da4317f44f9f98b3814e091a43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 16 Nov 2009 18:14:26 +0300 Subject: x86: io-apic: IO-APIC MMIO should not fail on resource insertion If IO-APIC base address is 1K aligned we should not fail on resourse insertion procedure. For this sake we define IO_APIC_SLOT_SIZE constant which should cover all IO-APIC direct accessible registers. An example of a such configuration is there http://marc.info/?l=linux-kernel&m=118114792006520 | | Quoting the message | | IOAPIC[0]: apic_id 2, version 32, address 0xfec00000, GSI 0-23 | IOAPIC[1]: apic_id 3, version 32, address 0xfec80000, GSI 24-47 | IOAPIC[2]: apic_id 4, version 32, address 0xfec80400, GSI 48-71 | IOAPIC[3]: apic_id 5, version 32, address 0xfec84000, GSI 72-95 | IOAPIC[4]: apic_id 8, version 32, address 0xfec84400, GSI 96-119 | Reported-by: "Maciej W. Rozycki" Signed-off-by: Cyrill Gorcunov Acked-by: Yinghai Lu LKML-Reference: <20091116151426.GC5653@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicdef.h | 6 ++++++ arch/x86/kernel/apic/io_apic.c | 11 +++++------ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da926de9..7fe3b3060f08 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -11,6 +11,12 @@ #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + #define APIC_ID 0x20 #define APIC_LVR 0x30 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 20ea8392bc57..ff237199fa23 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4100,18 +4100,17 @@ void __init ioapic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), + ioapic_phys); idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + PAGE_SIZE-1; + ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } } -- cgit v1.2.3 From d65ff75fbe6f8ac7c17f18e4108521898468822c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Nov 2009 18:06:18 -0500 Subject: x86: Add verbose option to insn decoder test Add verbose option to insn decoder test. This dumps decoded instruction when building kernel with V=1. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston Cc: Stephen Rothwell LKML-Reference: <20091116230618.5250.18762.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/tools/Makefile | 9 +++++- arch/x86/tools/test_get_len.c | 74 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 71 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 5e295d95dc25..4688f90ce5a2 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,6 +1,13 @@ PHONY += posttest + +ifeq ($(KBUILD_VERBOSE),1) + postest_verbose = -v +else + postest_verbose = +endif + quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(CONFIG_64BIT) + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len -$(CONFIG_64BIT) $(posttest_verbose) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index 376d33852191..5743e5128d35 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -20,6 +20,7 @@ #include #include #include +#include #define unlikely(cond) (cond) @@ -36,11 +37,16 @@ */ const char *prog; +static int verbose; +static int x86_64; static void usage(void) { fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " %s [y|n](64bit flag)\n", prog); + " %s [-y|-n] [-v] \n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v verbose mode\n"); exit(1); } @@ -50,6 +56,56 @@ static void malformed_line(const char *line, int line_nr) exit(3); } +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = { \n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void parse_args(int argc, char **argv) +{ + int c; + prog = argv[0]; + while ((c = getopt(argc, argv, "ynv")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + } +} + #define BUFSIZE 256 int main(int argc, char **argv) @@ -57,15 +113,9 @@ int main(int argc, char **argv) char line[BUFSIZE]; unsigned char insn_buf[16]; struct insn insn; - int insns = 0; - int x86_64 = 0; - - prog = argv[0]; - if (argc > 2) - usage(); + int insns = 0, c; - if (argc == 2 && argv[1][0] == 'y') - x86_64 = 1; + parse_args(argc, argv); while (fgets(line, BUFSIZE, stdin)) { char copy[BUFSIZE], *s, *tab1, *tab2; @@ -97,8 +147,10 @@ int main(int argc, char **argv) if (insn.length != nb) { fprintf(stderr, "Error: %s", line); fprintf(stderr, "Error: objdump says %d bytes, but " - "insn_get_length() says %d (attr:%x)\n", nb, - insn.length, insn.attr); + "insn_get_length() says %d\n", nb, + insn.length); + if (verbose) + dump_insn(stderr, &insn); exit(2); } } -- cgit v1.2.3 From 35039eb6b199749943547c8572be6604edf00229 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Nov 2009 18:06:24 -0500 Subject: x86: Show symbol name if insn decoder test failed Show symbol name if insn decoder test find a difference. This will help us to find out where the issue is. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston Cc: Stephen Rothwell LKML-Reference: <20091116230624.5250.49813.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/tools/distill.awk | 5 +++++ arch/x86/tools/test_get_len.c | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk index d433619bb866..c13c0ee48ab4 100644 --- a/arch/x86/tools/distill.awk +++ b/arch/x86/tools/distill.awk @@ -15,6 +15,11 @@ BEGIN { fwait_str="9b\tfwait" } +/^ *[0-9a-f]+ <[^>]*>:/ { + # Symbol entry + printf("%s%s\n", $2, $1) +} + /^ *[0-9a-f]+:/ { if (split($0, field, "\t") < 3) { # This is a continuation of the same insn. diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index 5743e5128d35..af75e07217ba 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -110,7 +110,7 @@ static void parse_args(int argc, char **argv) int main(int argc, char **argv) { - char line[BUFSIZE]; + char line[BUFSIZE], sym[BUFSIZE] = ""; unsigned char insn_buf[16]; struct insn insn; int insns = 0, c; @@ -122,6 +122,12 @@ int main(int argc, char **argv) int nb = 0; unsigned int b; + if (line[0] == '<') { + /* Symbol line */ + strcpy(sym, line); + continue; + } + insns++; memset(insn_buf, 0, 16); strcpy(copy, line); @@ -145,6 +151,8 @@ int main(int argc, char **argv) insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { + fprintf(stderr, "Error: %s found a difference at %s\n", + prog, sym); fprintf(stderr, "Error: %s", line); fprintf(stderr, "Error: objdump says %d bytes, but " "insn_get_length() says %d\n", nb, -- cgit v1.2.3 From 42109197eb7c01080eea6d9cd48ca23cbc3c566c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 15 Nov 2009 21:19:52 +0900 Subject: x86: gart: Add own dma_mapping_error function GART IOMMU is the only user of bad_dma_address variable. This patch converts GART to use the newer mechanism, fill in ->mapping_error() in struct dma_map_ops, to make dma_mapping_error() work in IOMMU specific way. Signed-off-by: FUJITA Tomonori Acked-by: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 919182e15d1e..61c4d1e41a6b 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -47,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +static dma_addr_t bad_dma_addr; + /* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is @@ -217,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); - return bad_dma_address; + return bad_dma_addr; } for (i = 0; i < npages; i++) { @@ -303,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_address) { + if (addr == bad_dma_addr) { if (i > 0) gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; @@ -456,7 +458,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_address; + s->dma_address = bad_dma_addr; return 0; } @@ -480,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, DMA_BIDIRECTIONAL, align_mask); flush_gart(); - if (paddr != bad_dma_address) { + if (paddr != bad_dma_addr) { *dma_addr = paddr; return page_address(page); } @@ -500,6 +502,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return (dma_addr == bad_dma_addr); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -687,6 +694,7 @@ static struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc_coherent = gart_alloc_coherent, .free_coherent = gart_free_coherent, + .mapping_error = gart_mapping_error, }; static void gart_iommu_shutdown(void) @@ -785,7 +793,7 @@ int __init gart_iommu_init(void) iommu_start = aper_size - iommu_size; iommu_bus_base = info.aper_base + iommu_start; - bad_dma_address = iommu_bus_base; + bad_dma_addr = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* -- cgit v1.2.3 From 8fd524b355daef0945692227e726fb444cebcd4f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 15 Nov 2009 21:19:53 +0900 Subject: x86: Kill bad_dma_address variable This kills bad_dma_address variable, the old mechanism to enable IOMMU drivers to make dma_mapping_error() work in IOMMU's specific way. bad_dma_address variable was introduced to enable IOMMU drivers to make dma_mapping_error() work in IOMMU's specific way. However, it can't handle systems that use both swiotlb and HW IOMMU. SO we introduced dma_map_ops->mapping_error to solve that case. Intel VT-d, GART, and swiotlb already use dma_map_ops->mapping_error. Calgary, AMD IOMMU, and nommu use zero for an error dma address. This adds DMA_ERROR_CODE and converts them to use it (as SPARC and POWER does). Signed-off-by: FUJITA Tomonori Acked-by: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 5 +++-- arch/x86/kernel/amd_iommu.c | 21 ++++++++++----------- arch/x86/kernel/pci-calgary_64.c | 22 ++++++++++------------ arch/x86/kernel/pci-dma.c | 3 --- arch/x86/kernel/pci-nommu.c | 2 +- 5 files changed, 24 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d42836..0f6c02f3b7d4 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -20,7 +20,8 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -extern dma_addr_t bad_dma_address; +#define DMA_ERROR_CODE 0 + extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); + return (dma_addr == DMA_ERROR_CODE); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 66237fde758f..093bd526c949 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } if (unlikely(address == -1)) - address = bad_dma_address; + address = DMA_ERROR_CODE; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -1544,7 +1544,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, pte = dma_ops_get_pte(dom, address); if (!pte) - return bad_dma_address; + return DMA_ERROR_CODE; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1625,7 +1625,7 @@ static dma_addr_t __map_single(struct device *dev, retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) { + if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the @@ -1646,7 +1646,7 @@ retry: start = address; for (i = 0; i < pages; ++i) { ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); - if (ret == bad_dma_address) + if (ret == DMA_ERROR_CODE) goto out_unmap; paddr += PAGE_SIZE; @@ -1674,7 +1674,7 @@ out_unmap: dma_ops_free_addresses(dma_dom, address, pages); - return bad_dma_address; + return DMA_ERROR_CODE; } /* @@ -1690,7 +1690,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == bad_dma_address) || + if ((dma_addr == DMA_ERROR_CODE) || (dma_addr + size > dma_dom->aperture_size)) return; @@ -1732,7 +1732,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); if (!check_device(dev)) - return bad_dma_address; + return DMA_ERROR_CODE; dma_mask = *dev->dma_mask; @@ -1743,12 +1743,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, return (dma_addr_t)paddr; if (!dma_ops_domain(domain)) - return bad_dma_address; + return DMA_ERROR_CODE; spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, dma_mask); - if (addr == bad_dma_address) + if (addr == DMA_ERROR_CODE) goto out; iommu_completion_wait(iommu); @@ -1957,7 +1957,7 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { + if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); goto out_free; } @@ -2110,7 +2110,6 @@ int __init amd_iommu_init_dma_ops(void) prealloc_protection_domains(); iommu_detected = 1; - bad_dma_address = 0; swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index c84ad037f586..af9f436096a2 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -245,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return bad_dma_address; + return DMA_ERROR_CODE; } } @@ -261,11 +261,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = bad_dma_address; + dma_addr_t ret = DMA_ERROR_CODE; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == bad_dma_address)) + if (unlikely(entry == DMA_ERROR_CODE)) goto error; /* set the return dma address */ @@ -280,7 +280,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, error: printk(KERN_WARNING "Calgary: failed to allocate %u pages in " "iommu %p\n", npages, tbl); - return bad_dma_address; + return DMA_ERROR_CODE; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, @@ -291,8 +291,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; /* were we called with bad_dma_address? */ - badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { + badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); + if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; @@ -374,7 +374,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == bad_dma_address) { + if (entry == DMA_ERROR_CODE) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -392,7 +392,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { - sg->dma_address = bad_dma_address; + sg->dma_address = DMA_ERROR_CODE; sg->dma_length = 0; } return 0; @@ -447,7 +447,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) + if (mapping == DMA_ERROR_CODE) goto free; *dma_handle = mapping; return ret; @@ -728,7 +728,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) struct iommu_table *tbl = pci_iommu(dev->bus); /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); + iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ @@ -1359,8 +1359,6 @@ static int __init calgary_iommu_init(void) return ret; } - bad_dma_address = 0x0; - return 0; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index bf621b9ee26e..afcc58b69c7c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -43,9 +43,6 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; -dma_addr_t bad_dma_address __read_mostly = 0; -EXPORT_SYMBOL(bad_dma_address); - /* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 875e3822ae61..22be12b60a8f 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return bad_dma_address; + return DMA_ERROR_CODE; flush_write_buffers(); return bus; } -- cgit v1.2.3 From 1f7564ca831a00b21bb493ef174c845b2ba9e64d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 15 Nov 2009 21:19:54 +0900 Subject: x86: Calgary: Remove unnecessary DMA_ERROR_CODE usage This cleans up iommu_alloc() a bit and removes unnecessary DMA_ERROR_CODE usage. Signed-off-by: FUJITA Tomonori Acked-by: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index af9f436096a2..849a0995d970 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -261,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = DMA_ERROR_CODE; + dma_addr_t ret; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == DMA_ERROR_CODE)) - goto error; + if (unlikely(entry == DMA_ERROR_CODE)) { + printk(KERN_WARNING "Calgary: failed to allocate %u pages in " + "iommu %p\n", npages, tbl); + return DMA_ERROR_CODE; + } /* set the return dma address */ ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); @@ -274,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, /* put the TCEs in the HW table */ tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, direction); - return ret; - -error: - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); - return DMA_ERROR_CODE; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, -- cgit v1.2.3 From 123bf0e2eddcda36a33bdfc87aa1fb07229f07b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 15 Nov 2009 21:19:52 +0900 Subject: x86: gart: Clean up the code a bit Clean up various small stylistic details in the GART code. No functionality changed. Cc: FUJITA Tomonori Cc: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 116 ++++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 55 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 61c4d1e41a6b..e6a0d402f171 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -95,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); @@ -297,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int i; #ifdef CONFIG_IOMMU_DEBUG - printk(KERN_DEBUG "dma_map_sg overflow\n"); + pr_debug("dma_map_sg overflow\n"); #endif for_each_sg(sg, s, nents, i) { @@ -392,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (!dev) dev = &x86_dma_fallback_dev; - out = 0; - start = 0; - start_sg = sgmap = sg; - seg_size = 0; - max_seg_size = dma_get_max_seg_size(dev); - ps = NULL; /* shut up gcc */ + out = 0; + start = 0; + start_sg = sg; + sgmap = sg; + seg_size = 0; + max_seg_size = dma_get_max_seg_size(dev); + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); @@ -420,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, sgmap, pages, need) < 0) goto error; out++; - seg_size = 0; - sgmap = sg_next(sgmap); - pages = 0; - start = i; - start_sg = s; + + seg_size = 0; + sgmap = sg_next(sgmap); + pages = 0; + start = i; + start_sg = s; } } @@ -523,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { - printk(KERN_WARNING + pr_warning( "PCI-DMA: Warning: Small IOMMU %luMB." " Consider increasing the AGP aperture in BIOS\n", iommu_size >> 20); @@ -578,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) aperture_alloc = aper_alloc; } -static int gart_resume(struct sys_device *dev) +static void gart_fixup_northbridges(struct sys_device *dev) { - printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); + int i; - if (fix_up_north_bridges) { - int i; + if (!fix_up_north_bridges) + return; - printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); + pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; - /* - * Don't enable translations just yet. That is the next - * step. Restore the pre-suspend aperture settings. - */ - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, - aperture_order << 1); - pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, - aperture_alloc >> 25); - } + /* + * Don't enable translations just yet. That is the next + * step. Restore the pre-suspend aperture settings. + */ + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); + pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); } +} + +static int gart_resume(struct sys_device *dev) +{ + pr_info("PCI-DMA: Resuming GART IOMMU\n"); + + gart_fixup_northbridges(dev); enable_gart_translations(); @@ -612,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class gart_sysdev_class = { - .name = "gart", - .suspend = gart_suspend, - .resume = gart_resume, + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, }; static struct sys_device device_gart = { - .id = 0, - .cls = &gart_sysdev_class, + .cls = &gart_sysdev_class, }; /* @@ -635,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) void *gatt; int i, error; - printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); + pr_info("PCI-DMA: Disabling AGP.\n"); + aper_size = aper_base = info->aper_size = 0; dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { @@ -653,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) } if (!aper_base) goto nommu; + info->aper_base = aper_base; info->aper_size = aper_size >> 20; @@ -675,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) flush_gart(); - printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + pr_info("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); return 0; nommu: /* Should not happen anymore */ - printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" + pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" "falling back to iommu=soft.\n"); return -1; } @@ -744,23 +752,23 @@ int __init gart_iommu_init(void) !gart_iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { - printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n"); - printk(KERN_WARNING "falling back to iommu=soft.\n"); + pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); + pr_warning("falling back to iommu=soft.\n"); } return 0; } /* need to map that range */ - aper_size = info.aper_size << 20; - aper_base = info.aper_base; - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + aper_size = info.aper_size << 20; + aper_base = info.aper_base; + end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { start_pfn = (aper_base>>PAGE_SHIFT); init_memory_mapping(start_pfn<> PAGE_SHIFT; @@ -775,8 +783,7 @@ int __init gart_iommu_init(void) ret = dma_debug_resize_entries(iommu_pages); if (ret) - printk(KERN_DEBUG - "PCI-DMA: Cannot trace all the entries\n"); + pr_debug("PCI-DMA: Cannot trace all the entries\n"); } #endif @@ -786,15 +793,14 @@ int __init gart_iommu_init(void) */ iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; - printk(KERN_INFO - "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", + pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; - bad_dma_addr = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); + agp_memory_reserved = iommu_size; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; + bad_dma_addr = iommu_bus_base; + iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* * Unmap the IOMMU part of the GART. The alias of the page is @@ -816,7 +822,7 @@ int __init gart_iommu_init(void) * the pages as Not-Present: */ wbinvd(); - + /* * Now all caches are flushed and we can safely enable * GART hardware. Doing it early leaves the possibility -- cgit v1.2.3 From 8cc2361bd00e87aab2827a3996a71fe9b2c9f9c4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 17 Nov 2009 08:06:38 +0100 Subject: x86: ucode-amd: Move family check to microcde_amd.c's init function ... to avoid useless trial to load firmware on systems with unsupported AMD CPUs. Signed-off-by: Andreas Herrmann Cc: Dmitry Adamushko Cc: Mike Travis Cc: Tigran Aivazian Cc: Borislav Petkov Cc: Andreas Mohr Cc: Jack Steiner LKML-Reference: <20091117070638.GA27691@alberich.amd.com> [ v2: changed BUG_ON() to WARN_ON() ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 26e33bd8485b..63123d902103 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -34,6 +34,7 @@ MODULE_LICENSE("GPL v2"); #define UCODE_UCODE_TYPE 0x00000001 const struct firmware *firmware; +static int supported_cpu; struct equiv_cpu_entry { u32 installed_cpu; @@ -73,15 +74,12 @@ static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { - struct cpuinfo_x86 *c = &cpu_data(cpu); u32 dummy; - memset(csig, 0, sizeof(*csig)); - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " - "supported\n", cpu, c->x86); + if (!supported_cpu) return -1; - } + + memset(csig, 0, sizeof(*csig)); rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; @@ -331,6 +329,17 @@ static void microcode_fini_cpu_amd(int cpu) void init_microcode_amd(struct device *device) { const char *fw_name = "amd-ucode/microcode_amd.bin"; + struct cpuinfo_x86 *c = &boot_cpu_data; + + WARN_ON(c->x86_vendor != X86_VENDOR_AMD); + + if (c->x86 < 0x10) { + pr_warning("microcode: AMD CPU family 0x%x not supported\n", + c->x86); + return; + } + supported_cpu = 1; + if (request_firmware(&firmware, fw_name, device)) pr_err("microcode: failed to load file %s\n", fw_name); } -- cgit v1.2.3 From 0696b711e4be45fa104c12329f617beb29c03f78 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 17 Nov 2009 13:49:50 +0800 Subject: timekeeping: Fix clock_gettime vsyscall time warp Since commit 0a544198 "timekeeping: Move NTP adjusted clock multiplier to struct timekeeper" the clock multiplier of vsyscall is updated with the unmodified clock multiplier of the clock source and not with the NTP adjusted multiplier of the timekeeper. This causes user space observerable time warps: new CLOCK-warp maximum: 120 nsecs, 00000025c337c537 -> 00000025c337c4bf Add a new argument "mult" to update_vsyscall() and hand in the timekeeping internal NTP adjusted multiplier. Signed-off-by: Lin Ming Cc: "Zhang Yanmin" Cc: Martin Schwidefsky Cc: Benjamin Herrenschmidt Cc: Tony Luck LKML-Reference: <1258436990.17765.83.camel@minggr.sh.intel.com> Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/time.c | 4 ++-- arch/powerpc/kernel/time.c | 5 +++-- arch/s390/kernel/time.c | 3 ++- arch/x86/kernel/vsyscall_64.c | 5 +++-- include/linux/clocksource.h | 6 ++++-- kernel/time/timekeeping.c | 6 +++--- 6 files changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 4990495d7531..a35c661e5e89 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -473,7 +473,7 @@ void update_vsyscall_tz(void) { } -void update_vsyscall(struct timespec *wall, struct clocksource *c) +void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult) { unsigned long flags; @@ -481,7 +481,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) /* copy fsyscall clock data */ fsyscall_gtod_data.clk_mask = c->mask; - fsyscall_gtod_data.clk_mult = c->mult; + fsyscall_gtod_data.clk_mult = mult; fsyscall_gtod_data.clk_shift = c->shift; fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio; fsyscall_gtod_data.clk_cycle_last = c->cycle_last; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index a136a11c490d..39713312fbc7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -828,7 +828,8 @@ static cycle_t timebase_read(struct clocksource *cs) return (cycle_t)get_tb(); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { u64 t2x, stamp_xsec; @@ -841,7 +842,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) /* XXX this assumes clock->shift == 22 */ /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - t2x = (u64) clock->mult * 4611686018ULL; + t2x = (u64) mult * 4611686018ULL; stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; do_div(stamp_xsec, 1000000000); stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34162a0b2caa..68e1ecf5ebab 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -214,7 +214,8 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { if (clock != &clocksource_tod) return; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..62f39d79b775 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -73,7 +73,8 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { unsigned long flags; @@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.clock.vread = clock->vread; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = clock->mult; + vsyscall_gtod_data.clock.mult = mult; vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 83d2fbd81b93..95e4995d9987 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -280,10 +280,12 @@ extern struct clocksource * __init __weak clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); #ifdef CONFIG_GENERIC_TIME_VSYSCALL -extern void update_vsyscall(struct timespec *ts, struct clocksource *c); +extern void +update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult); extern void update_vsyscall_tz(void); #else -static inline void update_vsyscall(struct timespec *ts, struct clocksource *c) +static inline void +update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult) { } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3a4e2907eaa..2a6d3e3e2c3e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -177,7 +177,7 @@ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } #ifdef CONFIG_GENERIC_TIME @@ -337,7 +337,7 @@ int do_settimeofday(struct timespec *tv) timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -811,7 +811,7 @@ void update_wall_time(void) update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ - update_vsyscall(&xtime, timekeeper.clock); + update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } /** -- cgit v1.2.3 From f7f3cad06080f14f60b1453af94463ff68ea2739 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sat, 24 Oct 2009 17:25:38 +0200 Subject: [CPUFREQ] longhaul: select Longhaul version 2 for capable CPUs There is a typo in the longhaul detection code so only Longhaul v1 or Longhaul v3 is selected. The Longhaul v2 is not selected even for CPUs which are capable of. Tested on PCChips Giga Pro board. Frequency changes work and the Longhaul v2 detects that the board is not capable of changing CPU voltage. Signed-off-by: Krzysztof Helt Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index ce2ed3e4aad9..cabd2fa3fc93 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); break; case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V1; + longhaul_version = TYPE_LONGHAUL_V2; if (c->x86_mask < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; -- cgit v1.2.3 From c53614ec17fe6296a696aa4ac71a799814bb50c1 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 6 Oct 2009 17:36:53 +0200 Subject: [CPUFREQ] powernow-k8: Fix test in get_transition_latency() Not makes it a bool before the comparison. Signed-off-by: Roel Kluin Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6394aa5c7985..3f12dabeab52 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data) * set it to 1 to avoid problems in the future. * For all others it's a BIOS bug. */ - if (!boot_cpu_data.x86 == 0x11) + if (boot_cpu_data.x86 != 0x11) printk(KERN_ERR FW_WARN PFX "Invalid zero transition " "latency\n"); max_latency = 1; -- cgit v1.2.3 From 293afe44d75abce4252db76cbb303a7de4297ce1 Mon Sep 17 00:00:00 2001 From: John Villalovos Date: Fri, 25 Sep 2009 13:30:08 -0400 Subject: [CPUFREQ] acpi-cpufreq: blacklist Intel 0f68: Fix HT detection and put in notification message Removing the SMT/HT check, since the Errata doesn't mention Hyper-Threading. Adding in a printk, so that the user knows why acpi-cpufreq refuses to load. Also, once system is blacklisted, don't repeat checks to see if blacklisted. This also causes the message to only be printed once, rather than for each CPU. Signed-off-by: John L. Villalovos Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 7d5c3b0ea8da..8b581d3905cb 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { - /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + /* Intel Xeon Processor 7100 Series Specification Update + * http://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause - * Both Processor Cores to Lock Up when HT is enabled*/ + * Both Processor Cores to Lock Up. */ if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8) && smt_capable()) + (c->x86_mask == 8)) { + printk(KERN_INFO "acpi-cpufreq: Intel(R) " + "Xeon(R) 7100 Errata AL30, processors may " + "lock up on frequency changes: disabling " + "acpi-cpufreq.\n"); return -ENODEV; + } } return 0; } @@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; +#ifdef CONFIG_SMP + static int blacklisted; +#endif dprintk("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP - result = acpi_cpufreq_blacklist(c); - if (result) - return result; + if (blacklisted) + return blacklisted; + blacklisted = acpi_cpufreq_blacklist(c); + if (blacklisted) + return blacklisted; #endif data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); -- cgit v1.2.3 From 8dca15e40889e5d5e9655b03ba79c26200f760ce Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 2 Nov 2009 23:35:30 -0800 Subject: [CPUFREQ] speedstep-ich: fix error caused by 394122ab144dae4b276d74644a2f11c44a60ac5c "[CPUFREQ] cpumask: avoid playing with cpus_allowed in speedstep-ich.c" changed the code to mistakenly pass the current cpu as the "processor" argument of speedstep_get_frequency(), whereas it should be the type of the processor. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14340 Based on a patch by Dave Mueller. Signed-off-by: Rusty Russell Acked-by: Dominik Brodowski Reported-by: Dave Mueller Cc: Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 6911e91fb4f6..3ae5a7a3a500 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -struct get_freq_data { - unsigned int speed; - unsigned int processor; -}; - -static void get_freq_data(void *_data) +static void get_freq_data(void *_speed) { - struct get_freq_data *data = _data; + unsigned int *speed = _speed; - data->speed = speedstep_get_frequency(data->processor); + *speed = speedstep_get_frequency(speedstep_processor); } static unsigned int speedstep_get(unsigned int cpu) { - struct get_freq_data data = { .processor = cpu }; + unsigned int speed; /* You're supposed to ensure CPU is online. */ - if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) BUG(); - dprintk("detected %u kHz as current frequency\n", data.speed); - return data.speed; + dprintk("detected %u kHz as current frequency\n", speed); + return speed; } /** -- cgit v1.2.3 From 6dbfe5a57db3564adf7b2a65068e40f1b4a0d2db Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 17 Nov 2009 18:27:18 +0100 Subject: x86: Fixup last users of irq_chip->typename The typename member of struct irq_chip was kept for migration purposes and is obsolete since more than 2 years. Fix up the leftovers. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/visws_quirks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b17..f084dfd97fcb 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) } static struct irq_chip cobalt_irq_type = { - .typename = "Cobalt-APIC", + .name = "Cobalt-APIC", .startup = startup_cobalt_irq, .shutdown = disable_cobalt_irq, .enable = enable_cobalt_irq, @@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) } static struct irq_chip piix4_master_irq_type = { - .typename = "PIIX4-master", + .name = "PIIX4-master", .startup = startup_piix4_master_irq, .ack = ack_cobalt_irq, .end = end_piix4_master_irq, @@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { static struct irq_chip piix4_virtual_irq_type = { - .typename = "PIIX4-virtual", + .name = "PIIX4-virtual", .shutdown = disable_8259A_irq, .enable = enable_8259A_irq, .disable = disable_8259A_irq, -- cgit v1.2.3 From 070e5c3f9989a72076e83fdd5ede3f0f3eb17264 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Nov 2009 12:27:47 +0100 Subject: x86: vmiclock: Fix printk format clockevents.mult became u32. Fix the printk format. Pointed-out-by: Randy Dunlap Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vmiclock_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 611b9e2360d3..74c92bb194df 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) evt->min_delta_ns = clockevent_delta2ns(1, evt); evt->cpumask = cpumask_of(cpu); - printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", + printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", evt->name, evt->mult, evt->shift); clockevents_register_device(evt); } -- cgit v1.2.3 From ce64c62074d945fe5f8a7f01bdc30125f994ea67 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Nov 2009 18:06:31 -0500 Subject: x86: Instruction decoder test should generate build warning Since some instructions are not decoded correctly by older versions of objdump, it may cause false positive error in insn decoder posttest. This changes build error of insn decoder test to build warning. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston Cc: Stephen Rothwell LKML-Reference: <20091116230631.5250.41579.stgit@harusame> Signed-off-by: Ingo Molnar --- arch/x86/tools/test_get_len.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index af75e07217ba..d8214dc03fa7 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -114,6 +114,7 @@ int main(int argc, char **argv) unsigned char insn_buf[16]; struct insn insn; int insns = 0, c; + int warnings = 0; parse_args(argc, argv); @@ -151,18 +152,22 @@ int main(int argc, char **argv) insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { - fprintf(stderr, "Error: %s found a difference at %s\n", + warnings++; + fprintf(stderr, "Warning: %s found difference at %s\n", prog, sym); - fprintf(stderr, "Error: %s", line); - fprintf(stderr, "Error: objdump says %d bytes, but " + fprintf(stderr, "Warning: %s", line); + fprintf(stderr, "Warning: objdump says %d bytes, but " "insn_get_length() says %d\n", nb, insn.length); if (verbose) dump_insn(stderr, &insn); - exit(2); } } - fprintf(stderr, "Succeed: decoded and checked %d instructions\n", - insns); + if (warnings) + fprintf(stderr, "Warning: decoded and checked %d" + " instructions with %d warnings\n", insns, warnings); + else + fprintf(stderr, "Succeed: decoded and checked %d" + " instructions\n", insns); return 0; } -- cgit v1.2.3 From 746357d6a526d6da9d89a2ec645b28406e959c2e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Nov 2009 12:01:43 +0100 Subject: x86: Prevent GCC 4.4.x (pentium-mmx et al) function prologue wreckage When the kernel is compiled with -pg for tracing GCC 4.4.x inserts stack alignment of a function _before_ the mcount prologue if the -march=pentium-mmx is set and -mtune=generic is not set. This breaks the assumption of the function graph tracer which expects that the mcount prologue push %ebp mov %esp, %ebp is the first stack operation in a function because it needs to modify the function return address on the stack to trap into the tracer before returning to the real caller. The generated code is: push %edi lea 0x8(%esp),%edi and $0xfffffff0,%esp pushl -0x4(%edi) push %ebp mov %esp,%ebp so the tracer modifies the copy of the return address which is stored after the stack alignment and therefor does not trap the return which in turn breaks the call chain logic of the tracer and leads to a kernel panic. Aside of the fact that the generated code is horrible for no good reason other -march -mtune options generate the expected: push %ebp mov %esp,%ebp and $0xfffffff0,%esp which does the same and keeps everything intact. After some experimenting we found out that this problem is restricted to gcc4.4.x and to the following -march settings: i586, pentium, pentium-mmx, k6, k6-2, k6-3, winchip-c6, winchip2, c3, geode By adding -mtune=generic the code generator produces always the expected code. So forcing -mtune=generic when CONFIG_FUNCTION_GRAPH_TRACER=y is not pretty, but at the moment the only way to prevent that the kernel trips over gcc-shrooms induced code madness. Most distro kernels have CONFIG_X86_GENERIC=y anyway which forces -mtune=generic as well so it will not impact those. References: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109 http://lkml.org/lkml/2009/11/19/17 Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Linus Torvalds Cc: Andrew Morton Cc: Ingo Molnar Cc: Peter Zijlstra Cc: H. Peter Anvin Cc: Steven Rostedt Cc: Frederic Weisbecker , Cc: Jeff Law Cc: gcc@gcc.gnu.org Cc: David Daney Cc: Andrew Haley Cc: Richard Guenther Cc: stable@kernel.org --- arch/x86/Makefile_32.cpu | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a264f69d..df7fdf811997 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -46,6 +46,12 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) +# Work around the pentium-mmx code generator madness of gcc4.4.x which +# does stack alignment by generating horrible code _before_ the mcount +# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph +# tracer assumptions +cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-mtune=generic) + # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. ifneq ($(CONFIG_X86_P6_NOP),y) -- cgit v1.2.3 From 80509e27e40d7554e576405ed9f5b7966c567112 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2009 12:13:08 -0500 Subject: x86: Fix insn decoder test typos Fix postest_verbose to posttest_verbose, and add posttest_64bit option for CONFIG_64BIT != y, since old command just passed '-' instead of '-n' when CONFIG_64BIT is not set. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston LKML-Reference: <20091120171307.6715.66099.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/tools/Makefile | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 4688f90ce5a2..c80b0792cd83 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,13 +1,19 @@ PHONY += posttest ifeq ($(KBUILD_VERBOSE),1) - postest_verbose = -v + posttest_verbose = -v else - postest_verbose = + posttest_verbose = +endif + +ifeq ($(CONFIG_64BIT),y) + posttest_64bit = -y +else + posttest_64bit = -n endif quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len -$(CONFIG_64BIT) $(posttest_verbose) + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) -- cgit v1.2.3 From 6f5f67267dc4faecd9cba63894de92ca92a608b8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2009 12:13:14 -0500 Subject: x86: insn decoder test checks objdump version Check objdump version before using it for insn decoder build test, because some older objdump can't decode AVX code correctly. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston LKML-Reference: <20091120171314.6715.30390.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/tools/Makefile | 5 ++++- arch/x86/tools/chkobjdump.awk | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 arch/x86/tools/chkobjdump.awk (limited to 'arch/x86') diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index c80b0792cd83..f82082677337 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -12,8 +12,11 @@ else posttest_64bit = -n endif +distill_awk = $(srctree)/arch/x86/tools/distill.awk +chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk + quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 000000000000..0d13cd9fdcff --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk @@ -0,0 +1,23 @@ +# GNU objdump version checker +# +# Usage: +# objdump -v | awk -f chkobjdump.awk +BEGIN { + # objdump version 2.19 or later is OK for the test. + od_ver = 2; + od_sver = 19; +} + +/^GNU/ { + split($4, ver, "."); + if (ver[1] > od_ver || + (ver[1] == od_ver && ver[2] >= od_sver)) { + exit 1; + } else { + printf("Warning: objdump version %s is older than %d.%d\n", + $4, od_ver, od_sver); + print("Warning: Skipping posttest."); + # Logic is inverted, because we just skip test without error. + exit 0; + } +} -- cgit v1.2.3 From e670761f12f4069d204f433bf547d9c679a4fd05 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: apic: Remove not needed #ifdef Suresh made dmar_table_init() already have that protection. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4c689f45b238..ad8c75b9e453 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1377,14 +1377,11 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; - int dmar_table_init_ret = 0; + int dmar_table_init_ret; -#ifdef CONFIG_INTR_REMAP dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret) - pr_debug("dmar_table_init() failed with %d:\n", - dmar_table_init_ret); -#endif + if (dmar_table_init_ret && !x2apic_supported()) + return; ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { -- cgit v1.2.3 From 37ef2a3029fde884808ff1b369677abc7dd9a79a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: Re-get cfg_new in case reuse/move irq_desc When irq_desc is moved, we need to make sure to use the right cfg_new. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ff237199fa23..085e60e303cf 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3186,6 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) continue; desc_new = move_irq_desc(desc_new, node); + cfg_new = desc_new->chip_data; if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; -- cgit v1.2.3 From 163d3866cfa79aa5945f1ee5e43fb3ed1455f75c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: apic: Print out SRAT table APIC id in hex Make it consistent with APIC MADT print out, for big systems APIC id in hex is more readable. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381f7b3b..9d7ce96e5a5c 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } -- cgit v1.2.3 From 6e3d8330ae2c4b2c11a9577a0130d2ecda1c610d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Nov 2009 10:19:20 +0100 Subject: perf events: Do not generate function trace entries in perf code Decreases perf overhead when function tracing is enabled, by about 50%. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 1 + kernel/Makefile | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9b..1d2cb383410e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,6 +5,7 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/kernel/Makefile b/kernel/Makefile index 17b575ec7d07..6b7ce8173dfd 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif obj-$(CONFIG_FREEZER) += freezer.o -- cgit v1.2.3 From 0e7810be30f66e9f430c4ce2cd3b14634211690f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 20 Nov 2009 14:00:14 +0000 Subject: x86: Suppress stack overrun message for init_task init_task doesn't get its stack end location set to STACK_END_MAGIC, and hence the message is confusing rather than helpful in this case. Signed-off-by: Jan Beulich LKML-Reference: <4B06AEFE02000078000211F4@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee9028cf0..071eee604147 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -658,7 +658,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); - if (*stackend != STACK_END_MAGIC) + if (tsk != &init_task && *stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; -- cgit v1.2.3 From 9f800de38b05d84809e89f16671d636a140eede7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 12:45:25 +0100 Subject: x86/amd-iommu: un__init iommu_setup_msi This function may be called on the resume path and can not be dropped after booting. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0d4581e602a4..72bdbdac9b48 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -926,7 +926,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) * ****************************************************************************/ -static int __init iommu_setup_msi(struct amd_iommu *iommu) +static int iommu_setup_msi(struct amd_iommu *iommu) { int r; -- cgit v1.2.3 From b369e521237d6ef21c453f3ac4f4b8577ec14f87 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 23 Nov 2009 19:54:06 +0800 Subject: crypto: aesni-intel - Use gas macro for AES-NI instructions Old binutils do not support AES-NI instructions, to make kernel can be compiled by them, .byte code is used instead of AES-NI assembly instructions. But the readability and flexibility of raw .byte code is not good. So corresponding assembly instruction like gas macro is used instead. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 517 +++++++++++++------------------------- 1 file changed, 173 insertions(+), 344 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb0566e83319..20bb0e1ac681 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -16,6 +16,7 @@ */ #include +#include .text @@ -122,103 +123,72 @@ ENTRY(aesni_set_key) movups 0x10(%rsi), %xmm2 # other user key movaps %xmm2, (%rcx) add $0x10, %rcx - # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_256a - # aeskeygenassist $0x1, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + AESKEYGENASSIST 0x1 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 call _key_expansion_256a - # aeskeygenassist $0x2, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 call _key_expansion_256a - # aeskeygenassist $0x4, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 call _key_expansion_256a - # aeskeygenassist $0x8, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 call _key_expansion_256a - # aeskeygenassist $0x10, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 call _key_expansion_256a - # aeskeygenassist $0x20, %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 call _key_expansion_256b - # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 call _key_expansion_256a jmp .Ldec_key .Lenc_key192: movq 0x10(%rsi), %xmm2 # other user key - # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_192a - # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 call _key_expansion_192b - # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 call _key_expansion_192a - # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 call _key_expansion_192b - # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 call _key_expansion_192a - # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 call _key_expansion_192b - # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 call _key_expansion_192a - # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 + AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 call _key_expansion_192b jmp .Ldec_key .Lenc_key128: - # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 call _key_expansion_128 - # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 call _key_expansion_128 - # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 call _key_expansion_128 - # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 call _key_expansion_128 - # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 call _key_expansion_128 - # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 call _key_expansion_128 - # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 + AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 call _key_expansion_128 - # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 + AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 call _key_expansion_128 - # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b + AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 call _key_expansion_128 - # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 - .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 + AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 call _key_expansion_128 .Ldec_key: sub $0x10, %rcx @@ -231,8 +201,7 @@ ENTRY(aesni_set_key) .align 4 .Ldec_key_loop: movaps (%rdi), %xmm0 - # aesimc %xmm0, %xmm1 - .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 + AESIMC %xmm0 %xmm1 movaps %xmm1, (%rsi) add $0x10, %rdi sub $0x10, %rsi @@ -274,51 +243,37 @@ _aesni_enc1: je .Lenc192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x50(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE .align 4 .Lenc192: movaps -0x40(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x30(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE .align 4 .Lenc128: movaps -0x20(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps -0x10(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps (TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x10(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x20(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x30(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x40(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x50(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x60(TKEYP), KEY - # aesenc KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + AESENC KEY STATE movaps 0x70(TKEYP), KEY - # aesenclast KEY, STATE # last round - .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + AESENCLAST KEY STATE ret /* @@ -353,135 +308,79 @@ _aesni_enc4: je .L4enc192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x50(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 #.align 4 .L4enc192: movaps -0x40(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x30(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 #.align 4 .L4enc128: movaps -0x20(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps -0x10(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps (TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x10(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x20(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x30(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x40(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x50(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x60(TKEYP), KEY - # aesenc KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 - # aesenc KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 - # aesenc KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xea - # aesenc KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 movaps 0x70(TKEYP), KEY - # aesenclast KEY, STATE1 # last round - .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 - # aesenclast KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 - # aesenclast KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xea - # aesenclast KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 + AESENCLAST KEY STATE1 # last round + AESENCLAST KEY STATE2 + AESENCLAST KEY STATE3 + AESENCLAST KEY STATE4 ret /* @@ -518,51 +417,37 @@ _aesni_dec1: je .Ldec192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x50(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE .align 4 .Ldec192: movaps -0x40(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x30(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE .align 4 .Ldec128: movaps -0x20(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps -0x10(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps (TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x10(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x20(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x30(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x40(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x50(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x60(TKEYP), KEY - # aesdec KEY, STATE - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + AESDEC KEY STATE movaps 0x70(TKEYP), KEY - # aesdeclast KEY, STATE # last round - .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + AESDECLAST KEY STATE ret /* @@ -597,135 +482,79 @@ _aesni_dec4: je .L4dec192 add $0x20, TKEYP movaps -0x60(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x50(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 .align 4 .L4dec192: movaps -0x40(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x30(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 .align 4 .L4dec128: movaps -0x20(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps -0x10(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps (TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x10(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x20(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x30(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x40(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x50(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x60(TKEYP), KEY - # aesdec KEY, STATE1 - .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 - # aesdec KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 - # aesdec KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xde, 0xea - # aesdec KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 movaps 0x70(TKEYP), KEY - # aesdeclast KEY, STATE1 # last round - .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 - # aesdeclast KEY, STATE2 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 - # aesdeclast KEY, STATE3 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xea - # aesdeclast KEY, STATE4 - .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 + AESDECLAST KEY STATE1 # last round + AESDECLAST KEY STATE2 + AESDECLAST KEY STATE3 + AESDECLAST KEY STATE4 ret /* -- cgit v1.2.3 From be831297716036de5b24308447ecb69f1706a846 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 12:50:00 +0100 Subject: x86/amd-iommu: attach devices to pre-allocated domains early For some devices the ACPI table may define unity map requirements which must me met when the IOMMU is enabled. So we need to attach devices to their domains as early as possible so that these mappings are in place when needed. This patch assigns the domains right after they are allocated. Otherwise this can result in I/O page faults before a driver binds to a device and BIOS is still using it. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 093bd526c949..b74b21247584 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2047,10 +2047,10 @@ static void prealloc_protection_domains(void) struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - u16 devid; + u16 devid, __devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = calc_devid(dev->bus->number, dev->devfn); + __devid = devid = calc_devid(dev->bus->number, dev->devfn); if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; @@ -2065,6 +2065,10 @@ static void prealloc_protection_domains(void) init_unity_mappings_for_device(dma_dom, devid); dma_dom->target_dev = devid; + attach_device(iommu, &dma_dom->domain, devid); + if (__devid != devid) + attach_device(iommu, &dma_dom->domain, __devid); + list_add_tail(&dma_dom->list, &iommu_pd_list); } } -- cgit v1.2.3 From 564ec0ec05ac6ee409bde81f7ef27a3dadbf3a6a Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 23 Nov 2009 19:55:22 +0800 Subject: crypto: ghash-clmulni-intel - Use gas macro for PCLMULQDQ-NI and PSHUFB Old binutils do not support PCLMULQDQ-NI and PSHUFB, to make kernel can be compiled by them, .byte code is used instead of assembly instructions. But the readability and flexibility of raw .byte code is not good. So corresponding assembly instruction like gas macro is used instead. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 59584982fb75..1528dc4886cf 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -17,7 +17,7 @@ */ #include -#include +#include .align 16 .Lbswap_mask: @@ -56,12 +56,9 @@ __clmul_gf128mul_ble: pxor DATA, T2 pxor SHASH, T3 - # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 - .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 - # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 - .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 - # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) - .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 + PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 + PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 + PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) pxor DATA, T2 pxor T1, T2 # T2 = a0 * b1 + a1 * b0 @@ -101,11 +98,9 @@ ENTRY(clmul_ghash_mul) movups (%rdi), DATA movups (%rsi), SHASH movaps .Lbswap_mask, BSWAP - # pshufb BSWAP, DATA - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP DATA call __clmul_gf128mul_ble - # pshufb BSWAP, DATA - .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 + PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) ret @@ -119,21 +114,18 @@ ENTRY(clmul_ghash_update) movaps .Lbswap_mask, BSWAP movups (%rdi), DATA movups (%rcx), SHASH - # pshufb BSWAP, DATA - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP DATA .align 4 .Lupdate_loop: movups (%rsi), IN1 - # pshufb BSWAP, IN1 - PSHUFB_XMM5_XMM6 + PSHUFB_XMM BSWAP IN1 pxor IN1, DATA call __clmul_gf128mul_ble sub $16, %rdx add $16, %rsi cmp $16, %rdx jge .Lupdate_loop - # pshufb BSWAP, DATA - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) .Lupdate_just_ret: ret @@ -146,8 +138,7 @@ ENTRY(clmul_ghash_update) ENTRY(clmul_ghash_setkey) movaps .Lbswap_mask, BSWAP movups (%rsi), %xmm0 - # pshufb BSWAP, %xmm0 - PSHUFB_XMM5_XMM0 + PSHUFB_XMM BSWAP %xmm0 movaps %xmm0, %xmm1 psllq $1, %xmm0 psrlq $63, %xmm1 -- cgit v1.2.3 From 68ee87164e73f68cf09070043c97e7f61e6966d4 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 23 Nov 2009 20:19:47 +0800 Subject: crypto: ghash-clmulni-intel - Put proper .data section in place Lbswap_mask, Lpoly and Ltwo_one should clearly belong to .data section, not .text. Signed-off-by: Jiri Kosina Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1528dc4886cf..1eb7f90cb7b9 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -19,6 +19,8 @@ #include #include +.data + .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f -- cgit v1.2.3 From ba6909b719a5ccc0c8100d2895bb7ff557b2eeae Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 23 Nov 2009 21:17:13 +0530 Subject: hw-breakpoint: Attribute authorship of hw-breakpoint related files Attribute authorship to developers of hw-breakpoint related files. Signed-off-by: K.Prasad Cc: Alan Stern Cc: Frederic Weisbecker LKML-Reference: <20091123154713.GA5593@in.ibm.com> [ v2: moved it to latest -tip ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/hw_breakpoint.c | 4 ++++ kernel/hw_breakpoint.c | 4 ++++ samples/hw_breakpoint/data_breakpoint.c | 2 ++ 3 files changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 752daebe91c6..4d267fb77828 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -16,6 +16,10 @@ * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation * Copyright (C) 2009 Frederic Weisbecker + * + * Authors: Alan Stern + * K.Prasad + * Frederic Weisbecker */ /* diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index b6d6fa273eeb..c16662268d75 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -18,6 +18,10 @@ * Copyright (C) 2009, Frederic Weisbecker * * Thanks to Ingo Molnar for his many suggestions. + * + * Authors: Alan Stern + * K.Prasad + * Frederic Weisbecker */ /* diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 5bc9819a819e..95063818bcf4 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -23,6 +23,8 @@ * that variable. * * Copyright (C) IBM Corporation, 2009 + * + * Author: K.Prasad */ #include /* Needed by all modules */ #include /* Needed for KERN_INFO */ -- cgit v1.2.3 From 0444c9bd0cf4e0eb946a7fcaf34765accfa9404a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 20 Nov 2009 14:03:05 +0000 Subject: x86: Tighten conditionals on MCE related statistics irq_thermal_count is only being maintained when X86_THERMAL_VECTOR, and both X86_THERMAL_VECTOR and X86_MCE_THRESHOLD don't need extra wrapping in X86_MCE conditionals. Signed-off-by: Jan Beulich Cc: Hidetoshi Seto Cc: Yong Wang Cc: Suresh Siddha Cc: Andi Kleen Cc: Borislav Petkov Cc: Arjan van de Ven LKML-Reference: <4B06AFA902000078000211F8@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hardirq.h | 6 +++--- arch/x86/kernel/irq.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f01043..108eb6fd1ae7 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -20,11 +20,11 @@ typedef struct { unsigned int irq_call_count; unsigned int irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; -# endif #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd5278568..19212cb01558 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); -# endif #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); @@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_call_count; sum += irq_stats(cpu)->irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; -# endif #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); -- cgit v1.2.3 From 581f202bcd60acbc3af1f5faa429e570c512f8a3 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 20 Nov 2009 15:48:26 -0600 Subject: x86: UV RTC: Always enable RTC clocksource Always enable the RTC clocksource on UV systems. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091120214826.GA20016@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 3da7b1d8bfd3..3c84aa001c11 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -74,7 +74,6 @@ struct uv_rtc_timer_head { */ static struct uv_rtc_timer_head **blade_info __read_mostly; -static int uv_rtc_enable; static int uv_rtc_evt_enable; /* @@ -335,14 +334,6 @@ static void uv_rtc_interrupt(void) ced->event_handler(ced); } -static int __init uv_enable_rtc(char *str) -{ - uv_rtc_enable = 1; - - return 1; -} -__setup("uvrtc", uv_enable_rtc); - static int __init uv_enable_evt_rtc(char *str) { uv_rtc_evt_enable = 1; @@ -364,12 +355,16 @@ static __init int uv_rtc_setup_clock(void) { int rc; - if (!uv_rtc_enable || !is_uv_system() || x86_platform_ipi_callback) + if (!is_uv_system()) return -ENODEV; clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, clocksource_uv.shift); + /* If single blade, prefer tsc */ + if (uv_num_possible_blades() == 1) + clocksource_uv.rating = 250; + rc = clocksource_register(&clocksource_uv); if (rc) printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); @@ -377,7 +372,7 @@ static __init int uv_rtc_setup_clock(void) printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", sn_rtc_cycles_per_second/(unsigned long)1E6); - if (rc || !uv_rtc_evt_enable) + if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) return rc; /* Setup and register clockevents */ -- cgit v1.2.3 From 27c13ecec4d8856687b50b959e1146845b478f95 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 21 Nov 2009 14:01:45 +0100 Subject: x86, cpu: mv display_cacheinfo -> cpu_detect_cache_sizes display_cacheinfo() doesn't display anything anymore and it is used to detect CPU cache sizes. Rename it accordingly. Signed-off-by: Borislav Petkov LKML-Reference: <20091121130145.GA31357@liondog.tnic> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/centaur.c | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/cpu.h | 2 +- arch/x86/kernel/cpu/cyrix.c | 2 +- arch/x86/kernel/cpu/transmeta.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71c..7128b3799cec 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb095..e58d978e0758 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); } enum { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 61242a56c2d6..9bf845dc8055 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) static void __cpuinit default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 - display_cacheinfo(c); + cpu_detect_cache_sizes(c); #else /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) } } -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e400..3624e8a0f71b 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -32,6 +32,6 @@ struct cpu_dev { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; -extern void display_cacheinfo(struct cpuinfo_x86 *c); +extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f058..4fbd384fb645 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) /* Handle the GX (Formally known as the GX2) */ if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); + cpu_detect_cache_sizes(c); else init_cyrix(c); } diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caad..28000743bbb0 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) early_init_transmeta(c); - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); -- cgit v1.2.3 From 1261a02a0c0ab8e643125705f0d1d83e5090e4d1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 24 Nov 2009 05:27:18 -0800 Subject: perf_events, x86: Fix validate_event bug The validate_event() was failing on valid event combinations. The function was assuming that if x86_schedule_event() returned 0, it meant error. But x86_schedule_event() returns the counter index and 0 is a perfectly valid value. An error is returned if the function returns a negative value. Furthermore, validate_event() was also failing for event groups because the event->pmu was not set until after hw_perf_event_init(). Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: paulus@samba.org Cc: perfmon2-devel@lists.sourceforge.net Cc: eranian@gmail.com LKML-Reference: <4b0bdf36.1818d00a.07cc.25ae@mx.google.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/cpu/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bd8743024204..c1bbed1021d9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2229,10 +2229,10 @@ validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) { struct hw_perf_event fake_event = event->hw; - if (event->pmu != &pmu) + if (event->pmu && event->pmu != &pmu) return 0; - return x86_schedule_event(cpuc, &fake_event); + return x86_schedule_event(cpuc, &fake_event) >= 0; } static int validate_group(struct perf_event *event) -- cgit v1.2.3 From d77b81974521c82fa6fda38dfff1b491dcc62a32 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 24 Nov 2009 16:53:00 +0100 Subject: [CPUFREQ] Enable ACPI PDC handshake for VIA/Centaur CPUs In commit 0de51088e6a82bc8413d3ca9e28bbca2788b5b53, we introduced the use of acpi-cpufreq on VIA/Centaur CPU's by removing a vendor check for VENDOR_INTEL. However, as it turns out, at least the Nano CPU's also need the PDC (processor driver capabilities) handshake in order to activate the methods required for acpi-cpufreq. Since arch_acpi_processor_init_pdc() contains another vendor check for Intel, the PDC is not initialized on VIA CPU's. The resulting behavior of a current mainline kernel on such systems is: acpi-cpufreq loads and it indicates CPU frequency changes. However, the CPU stays at a single frequency This trivial patch ensures that init_intel_pdc() is called on Intel and VIA/Centaur CPU's alike. Signed-off-by: Harald Welte Signed-off-by: Dave Jones --- arch/x86/kernel/acpi/processor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d296f4a195c9..d85d1b2432ba 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) struct cpuinfo_x86 *c = &cpu_data(pr->id); pr->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL) + if (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR) init_intel_pdc(pr, c); return; -- cgit v1.2.3 From b8cbe7e82ec8b55d7bbdde66fc69e788fde00dc6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:57:56 +1030 Subject: [CPUFREQ] cpumask: don't put a cpumask on the stack in x86...cpufreq/powernow-k8.c It's still mugging the current process's cpumask, but as comment in 1ff6e97f1d says, it's not a trivial fix. So, at least we can use a cpumask_var_t to do the Wrong Thing the Right Way :) Signed-off-by: Rusty Russell To: cpufreq@vger.kernel.org Cc: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3f12dabeab52..f30d25383940 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask; + cpumask_var_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + /* only run on specific CPU from here on. */ + /* This is poor form: use a workqueue or smp_call_function_single */ + if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(oldmask, tsk_cpumask(current)); + set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = 0; err_out: - set_cpus_allowed_ptr(current, &oldmask); + set_cpus_allowed_ptr(current, oldmask); + free_cpumask_var(oldmask); return ret; } -- cgit v1.2.3 From db2820dd5445a44b4726f15a2bc89b9ded2503eb Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sun, 25 Oct 2009 19:45:57 +0100 Subject: [CPUFREQ] powernow-k6: set transition latency value so ondemand governor can be used Set the transition latency to value smaller than CPUFREQ_ETERNAL so governors other than "performance" work (like the "ondemand" one). The value is found in "AMD PowerNow! Technology Platform Design Guide for Embedded Processors" dated December 2000 (AMD doc #24267A). There is the answer to one of FAQs on page 40 which states that suggested complete transition period is 200 us. Tested on K6-2+ CPU with K6-3 core (model 13, stepping 4). Signed-off-by: Krzysztof Helt Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea409f40..cb01dac267d3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; policy->cur = busfreq * max_multiplier; result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); -- cgit v1.2.3 From 1cce76c2ac60df40b02bf747982fb3f00e68f50a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Nov 2009 14:39:53 -0800 Subject: [CPUFREQ] use an enum for speedstep processor identification The "unsigned int processor" everywhere confused Rusty, leading to breakage when he passed in smp_processor_id(). Signed-off-by: Rusty Russell Acked-by: Dominik Brodowski Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 6 +++--- arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 24 ++++++++++++------------ arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 3ae5a7a3a500..2ce8e0b5cc54 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; static u32 pmbase; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b8482f..ad0083abfa23 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -34,7 +34,7 @@ static int relaxed_check; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency(unsigned int processor) +static unsigned int pentium3_get_frequency(enum speedstep_processor processor) { /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { @@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void) /* Warning: may get called from smp_call_function_single. */ -unsigned int speedstep_get_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(enum speedstep_processor processor) { switch (processor) { case SPEEDSTEP_CPU_PCORE: @@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); * DETECT SPEEDSTEP SPEEDS * *********************************************************************/ -unsigned int speedstep_get_freqs(unsigned int processor, +unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e5a304..70d9cea1219d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -11,18 +11,18 @@ /* processors */ - -#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ - +enum speedstep_processor { + SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ + SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using * the speedstep_get_frequency() call. */ -#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ + SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ + SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ + SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ +}; /* speedstep states -- only two of them */ @@ -31,10 +31,10 @@ /* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); +extern enum speedstep_processor speedstep_detect_processor(void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); /* detect the low and high speeds of the processor. The callback @@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ -extern unsigned int speedstep_get_freqs(unsigned int processor, +extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea088e4f5..04d73c114e49 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -35,7 +35,7 @@ static int smi_cmd; static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; /* * There are only two frequency states for each processor. Values -- cgit v1.2.3 From e2f74f355e9e2914483db10c05d70e69e0b7ae04 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 19 Nov 2009 12:31:01 +0100 Subject: [ACPI/CPUFREQ] Introduce bios_limit per cpu cpufreq sysfs interface This interface is mainly intended (and implemented) for ACPI _PPC BIOS frequency limitations, but other cpufreq drivers can also use it for similar use-cases. Why is this needed: Currently it's not obvious why cpufreq got limited. People see cpufreq/scaling_max_freq reduced, but this could have happened by: - any userspace prog writing to scaling_max_freq - thermal limitations - hardware (_PPC in ACPI case) limitiations Therefore export bios_limit (in kHz) to: - Point the user that it's the BIOS (broken or intended) which limits frequency - Export it as a sysfs interface for userspace progs. While this was a rarely used feature on laptops, there will appear more and more server implemenations providing "Green IT" features like allowing the service processor to limit the frequency. People want to know about HW/BIOS frequency limitations. All ACPI P-state driven cpufreq drivers are covered with this patch: - powernow-k8 - powernow-k7 - acpi-cpufreq Tested with a patched DSDT which limits the first two cores (_PPC returns 1) via _PPC, exposed by bios_limit: # echo 2200000 >cpu2/cpufreq/scaling_max_freq # cat cpu*/cpufreq/scaling_max_freq 2600000 2600000 2200000 2200000 # #scaling_max_freq shows general user/thermal/BIOS limitations # cat cpu*/cpufreq/bios_limit 2600000 2600000 2800000 2800000 # #bios_limit only shows the HW/BIOS limitation CC: Pallipadi Venkatesh CC: Len Brown CC: davej@codemonkey.org.uk CC: linux@dominikbrodowski.net Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- Documentation/cpu-freq/user-guide.txt | 11 +++++++++++ arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 17 +++++++++-------- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 19 +++++++++++-------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 17 +++++++++-------- drivers/acpi/processor_perflib.c | 13 +++++++++++++ drivers/cpufreq/cpufreq.c | 21 +++++++++++++++++++++ include/acpi/processor.h | 6 ++++++ include/linux/cpufreq.h | 1 + 8 files changed, 81 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 2a5b850847c0..04f6b32993e6 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -203,6 +203,17 @@ scaling_cur_freq : Current frequency of the CPU as determined by the frequency the kernel thinks the CPU runs at. +bios_limit : If the BIOS tells the OS to limit a CPU to + lower frequencies, the user can read out the + maximum available frequency from this file. + This typically can happen through (often not + intended) BIOS settings, restrictions + triggered through a service processor or other + BIOS/HW based implementations. + This does not cover thermal ACPI limitations + which can be detected through the generic + thermal driver. + If you have selected the "userspace" governor which allows you to set the CPU operating frequency to a specific value, you can read out the current frequency in diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8b581d3905cb..d2e7c77c1ea4 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; static int __init acpi_cpufreq_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775eb0ab..9a97116f89e5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { }; static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + .bios_limit = acpi_processor_get_bios_limit, +#endif + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, }; static int __init powernow_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f30d25383940..a9df9441a9a2 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1398,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = { }; static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, + .verify = powernowk8_verify, + .target = powernowk8_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, }; /* driver entry point for init */ diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 8ba0ed0b9ddb..01e366d2b6fb 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -167,6 +167,19 @@ int acpi_processor_ppc_has_changed(struct acpi_processor *pr) return cpufreq_update_policy(pr->id); } +int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) +{ + struct acpi_processor *pr; + + pr = per_cpu(processors, cpu); + if (!pr || !pr->performance || !pr->performance->state_count) + return -ENODEV; + *limit = pr->performance->states[pr->performance_platform_limit]. + core_frequency * 1000; + return 0; +} +EXPORT_SYMBOL(acpi_processor_get_bios_limit); + void acpi_processor_ppc_init(void) { if (!cpufreq_register_notifier diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5b9b1c8c4950..f20668c09ce0 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -647,6 +647,21 @@ static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) return policy->governor->show_setspeed(policy, buf); } +/** + * show_scaling_driver - show the current cpufreq HW/BIOS limitation + */ +static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf) +{ + unsigned int limit; + int ret; + if (cpufreq_driver->bios_limit) { + ret = cpufreq_driver->bios_limit(policy->cpu, &limit); + if (!ret) + return sprintf(buf, "%u\n", limit); + } + return sprintf(buf, "%u\n", policy->cpuinfo.max_freq); +} + #define define_one_ro(_name) \ static struct freq_attr _name = \ __ATTR(_name, 0444, show_##_name, NULL) @@ -666,6 +681,7 @@ define_one_ro(cpuinfo_transition_latency); define_one_ro(scaling_available_governors); define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); +define_one_ro(bios_limit); define_one_ro(related_cpus); define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); @@ -905,6 +921,11 @@ static int cpufreq_add_dev_interface(unsigned int cpu, if (ret) goto err_out_kobj_put; } + if (cpufreq_driver->bios_limit) { + ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); + if (ret) + goto err_out_kobj_put; + } spin_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) { diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 740ac3ad8fd0..8b668ead6d6e 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -295,6 +295,7 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx void acpi_processor_ppc_init(void); void acpi_processor_ppc_exit(void); int acpi_processor_ppc_has_changed(struct acpi_processor *pr); +extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit); #else static inline void acpi_processor_ppc_init(void) { @@ -316,6 +317,11 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr) } return 0; } +static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) +{ + return -ENODEV; +} + #endif /* CONFIG_CPU_FREQ */ /* in processor_throttling.c */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 79a2340d83cd..4de02b10007f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -232,6 +232,7 @@ struct cpufreq_driver { /* optional */ unsigned int (*getavg) (struct cpufreq_policy *policy, unsigned int cpu); + int (*bios_limit) (int cpu, unsigned int *limit); int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); -- cgit v1.2.3 From 2ed7a806d864bde5903b73da1c65b0316b21efd3 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 16 Nov 2009 14:21:13 -0700 Subject: x86/PCI: remove early PCI pr_debug statements commit db635adc turned -DDEBUG for x86/pci on when CONFIG_PCI_DEBUG is set. In general, I agree with that change. However, it exposes a bunch of very low level PCI debugging in the early x86 path, such as: 0 reading 2 from a: ffff 1 reading 2 from a: ffff 2 reading 2 from a: ffff 3 reading 2 from a: 300 3 reading 2 from 0: 1002 3 reading 2 from 2: 515e These statements add a lot of noise to the boot and aren't likely to be necessary even when handling random upstream bug reports. [In contrast, statements such as these: pci 0000:02:04.0: found [14e4:164a] class 000200 header type 00 pci 0000:02:04.0: reg 10: [mem 0xf8000000-0xf9ffffff 64bit] pci 0000:02:04.0: reg 30: [mem 0x00000000-0x0001ffff pref] are indeed useful when remote debugging users' machines] Remove the noisy printks and save electrons everywhere. Cc: Bjorn Helgaas Cc: Yinghai Lu Cc: Andi Kleen Cc: Ingo Molnar Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- arch/x86/pci/early.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index aaf26ae58cd5..d1067d539bee 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) u32 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inl(0xcfc); - if (v != 0xffffffff) - pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); return v; } @@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) u8 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inb(0xcfc + (offset&3)); - pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); return v; } @@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) u16 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inw(0xcfc + (offset&2)); - pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); return v; } void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outb(val, 0xcfc + (offset&3)); } void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outw(val, 0xcfc + (offset&2)); } -- cgit v1.2.3 From 7b7a78594292d540720485544ad1043b71de14e0 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 17 Nov 2009 23:19:53 +0100 Subject: PCI: fix comment typo in bus_numa.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Goddard Rosa Signed-off-by: Jiri Kosina Signed-off-by: Jesse Barnes --- arch/x86/pci/bus_numa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 4ff126a3e887..730369f392a3 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -2,7 +2,7 @@ /* * sub bus (transparent) will use entres from 3 to store extra from - * root, so need to make sure we have enought slot there, Should we + * root, so need to make sure we have enough slot there, Should we * increase PCI_BUS_NUM_RESOURCES? */ #define RES_NUM 16 -- cgit v1.2.3 From 67f241f4579651ea4335b58967c8880c0a378249 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Nov 2009 22:27:40 -0800 Subject: x86/pci: seperate x86_pci_rootbus_res_quirks from amd_bus.c Those functions are used by intel_bus.c so seperate them to another file. and make amd_bus a bit smaller. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/amd_bus.c | 99 ----------------------------------------------- arch/x86/pci/bus_numa.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/pci/bus_numa.h | 1 + 4 files changed, 103 insertions(+), 100 deletions(-) create mode 100644 arch/x86/pci/bus_numa.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d8a0a6279a4d..564b008a51c7 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o -obj-$(CONFIG_X86_64) += intel_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 995f36096a42..95ecbd495955 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -6,8 +6,6 @@ #ifdef CONFIG_X86_64 #include -#include -#include #endif #include "bus_numa.h" @@ -19,54 +17,6 @@ #ifdef CONFIG_X86_64 -int pci_root_num; -struct pci_root_info pci_root_info[PCI_ROOT_NR]; -static int found_all_numa_early; - -void x86_pci_root_bus_res_quirks(struct pci_bus *b) -{ - int i; - int j; - struct pci_root_info *info; - - /* don't go for it if _CRS is used already */ - if (b->resource[0] != &ioport_resource || - b->resource[1] != &iomem_resource) - return; - - if (!pci_root_num) - return; - - /* for amd, if only one root bus, don't need to do anything */ - if (pci_root_num < 2 && found_all_numa_early) - return; - - for (i = 0; i < pci_root_num; i++) { - if (pci_root_info[i].bus_min == b->number) - break; - } - - if (i == pci_root_num) - return; - - printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", - b->number); - - info = &pci_root_info[i]; - for (j = 0; j < info->res_num; j++) { - struct resource *res; - struct resource *root; - - res = &info->res[j]; - b->resource[j] = res; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - else - root = &iomem_resource; - insert_resource(root, res); - } -} - #define RANGE_NUM 16 struct res_range { @@ -119,55 +69,6 @@ static void __init update_range(struct res_range *range, size_t start, } } -void __init update_res(struct pci_root_info *info, size_t start, - size_t end, unsigned long flags, int merge) -{ - int i; - struct resource *res; - - if (start > end) - return; - - if (!merge) - goto addit; - - /* try to merge it with old one */ - for (i = 0; i < info->res_num; i++) { - size_t final_start, final_end; - size_t common_start, common_end; - - res = &info->res[i]; - if (res->flags != flags) - continue; - - common_start = max((size_t)res->start, start); - common_end = min((size_t)res->end, end); - if (common_start > common_end + 1) - continue; - - final_start = min((size_t)res->start, start); - final_end = max((size_t)res->end, end); - - res->start = final_start; - res->end = final_end; - return; - } - -addit: - - /* need to add that */ - if (info->res_num >= RES_NUM) - return; - - res = &info->res[info->res_num]; - res->name = info->name; - res->flags = flags; - res->start = start; - res->end = end; - res->child = NULL; - info->res_num++; -} - struct pci_hostbridge_probe { u32 bus; u32 slot; diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c new file mode 100644 index 000000000000..145df00e0387 --- /dev/null +++ b/arch/x86/pci/bus_numa.c @@ -0,0 +1,101 @@ +#include +#include + +#include "bus_numa.h" + +int pci_root_num; +struct pci_root_info pci_root_info[PCI_ROOT_NR]; +int found_all_numa_early; + +void x86_pci_root_bus_res_quirks(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + /* don't go for it if _CRS is used already */ + if (b->resource[0] != &ioport_resource || + b->resource[1] != &iomem_resource) + return; + + if (!pci_root_num) + return; + + /* for amd, if only one root bus, don't need to do anything */ + if (pci_root_num < 2 && found_all_numa_early) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", + b->number); + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (start > end) + return; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + + res = &info->res[i]; + if (res->flags != flags) + continue; + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 730369f392a3..adbc23fe82ac 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -20,6 +20,7 @@ struct pci_root_info { #define PCI_ROOT_NR 4 extern int pci_root_num; extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; +extern int found_all_numa_early; extern void update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge); -- cgit v1.2.3 From 5663b1b963183e98ece3e77e471da833bb5ad2ff Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:37 -0700 Subject: x86/PCI: MMCONFIG: remove unused definitions Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 02642773c29d..9bf04bcfb9c2 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -23,10 +23,6 @@ #define PREFIX "PCI: " -/* aperture is up to 256MB but BIOS may reserve less */ -#define MMCONFIG_APER_MIN (2 * 1024*1024) -#define MMCONFIG_APER_MAX (256 * 1024*1024) - /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; -- cgit v1.2.3 From e823d6ff581c5d1d76aa8c73a202d7d1419d34b8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:42 -0700 Subject: x86/PCI: MMCONFIG: count MCFG structures with local variable Use a local variable, not pci_mmcfg_config_num, to count MCFG entries. No functional change, but simplifies future changes. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 9bf04bcfb9c2..fbadb89c71eb 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -555,7 +555,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; unsigned long i; - int config_size; + int entries, config_size; if (!header) return -EINVAL; @@ -564,17 +564,18 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) /* how many config structures do we have */ pci_mmcfg_config_num = 0; + entries = 0; i = header->length - sizeof(struct acpi_table_mcfg); while (i >= sizeof(struct acpi_mcfg_allocation)) { - ++pci_mmcfg_config_num; + entries++; i -= sizeof(struct acpi_mcfg_allocation); }; - if (pci_mmcfg_config_num == 0) { + if (entries == 0) { printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); return -ENODEV; } - config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); + config_size = entries * sizeof(*pci_mmcfg_config); pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); if (!pci_mmcfg_config) { printk(KERN_WARNING PREFIX @@ -583,8 +584,9 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) } memcpy(pci_mmcfg_config, &mcfg[1], config_size); + pci_mmcfg_config_num = entries; - for (i = 0; i < pci_mmcfg_config_num; ++i) { + for (i = 0; i < entries; i++) { if (acpi_mcfg_check_entry(mcfg, &pci_mmcfg_config[i])) { kfree(pci_mmcfg_config); pci_mmcfg_config_num = 0; -- cgit v1.2.3 From d3578ef7aab5b9bb874d085609b3ed5d9abffc48 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:47 -0700 Subject: x86/PCI: MMCONFIG: step through MCFG table, not pci_mmcfg_config[] Step through the ACPI MCFG table, not pci_mmcfg_config[]. No functional change, but simplifies future patches that encapsulate pci_mmcfg_config[]. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index fbadb89c71eb..7a7b6ba3abbb 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -554,6 +554,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, static int __init pci_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; + struct acpi_mcfg_allocation *cfg_table, *cfg; unsigned long i; int entries, config_size; @@ -586,8 +587,10 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) memcpy(pci_mmcfg_config, &mcfg[1], config_size); pci_mmcfg_config_num = entries; + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; for (i = 0; i < entries; i++) { - if (acpi_mcfg_check_entry(mcfg, &pci_mmcfg_config[i])) { + cfg = &cfg_table[i]; + if (acpi_mcfg_check_entry(mcfg, cfg)) { kfree(pci_mmcfg_config); pci_mmcfg_config_num = 0; return -ENODEV; -- cgit v1.2.3 From 7da7d360ae025158d09aab18d66f5d2fe3c02252 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:53 -0700 Subject: x86/PCI: MMCONFIG: centralize MCFG structure management This patch encapsulate pci_mmcfg_config[] updates. All alloc/free is now done in pci_mmconfig_add() and free_all_mcfg(), so all updates to pci_mmcfg_config[] and pci_mmcfg_config_num are in those two functions. This replaces the previous sequence of extend_mmcfg(), fill_one_mmcfg() with the single pci_mmconfig_add() interface. This interface is currently static but will eventually be used in the host bridge hot-add path. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 85 +++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 7a7b6ba3abbb..62a8ecd96980 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -26,14 +26,24 @@ /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; -static __init int extend_mmcfg(int num) +static __init void free_all_mmcfg(void) +{ + pci_mmcfg_arch_free(); + pci_mmcfg_config_num = 0; + kfree(pci_mmcfg_config); + pci_mmcfg_config = NULL; +} + +static __init struct acpi_mcfg_allocation *pci_mmconfig_add(int segment, + int start, int end, u64 addr) { struct acpi_mcfg_allocation *new; - int new_num = pci_mmcfg_config_num + num; + int new_num = pci_mmcfg_config_num + 1; + int i = pci_mmcfg_config_num; new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); if (!new) - return -1; + return NULL; if (pci_mmcfg_config) { memcpy(new, pci_mmcfg_config, @@ -42,18 +52,13 @@ static __init int extend_mmcfg(int num) } pci_mmcfg_config = new; - return 0; -} - -static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) -{ - int i = pci_mmcfg_config_num; - pci_mmcfg_config_num++; pci_mmcfg_config[i].address = addr; pci_mmcfg_config[i].pci_segment = segment; pci_mmcfg_config[i].start_bus_number = start; pci_mmcfg_config[i].end_bus_number = end; + + return &pci_mmcfg_config[i]; } static const char __init *pci_mmcfg_e7520(void) @@ -65,11 +70,9 @@ static const char __init *pci_mmcfg_e7520(void) if (win == 0x0000 || win == 0xf000) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL) return NULL; - fill_one_mmcfg(win << 16, 0, 0, 255); - return "Intel Corporation E7520 Memory Controller Hub"; } @@ -111,11 +114,9 @@ static const char __init *pci_mmcfg_intel_945(void) if ((pciexbar & mask) >= 0xf0000000U) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL) return NULL; - fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); - return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } @@ -124,7 +125,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void) u32 low, high, address; u64 base, msr; int i; - unsigned segnbits = 0, busnbits; + unsigned segnbits = 0, busnbits, end_bus; if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) return NULL; @@ -158,11 +159,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void) busnbits = 8; } - if (extend_mmcfg(1 << segnbits) == -1) - return NULL; - + end_bus = (1 << busnbits) - 1; for (i = 0; i < (1 << segnbits); i++) - fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); + if (pci_mmconfig_add(i, 0, end_bus, + base + (1<<28) * i) == NULL) { + free_all_mmcfg(); + return NULL; + } return "AMD Family 10h NB"; } @@ -210,16 +213,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) if (!(extcfg & extcfg_enable_mask)) continue; - if (extend_mmcfg(1) == -1) - continue; - size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; base = extcfg & extcfg_base_mask[size_index]; /* base could > 4G */ base <<= extcfg_base_lshift; start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; end = start + extcfg_sizebus[size_index] - 1; - fill_one_mmcfg(base, 0, start, end); + if (pci_mmconfig_add(0, start, end, base) == NULL) + continue; mcp55_mmconf_found++; } @@ -303,8 +304,7 @@ static int __init pci_mmcfg_check_hostbridge(void) if (!raw_pci_ops) return 0; - pci_mmcfg_config_num = 0; - pci_mmcfg_config = NULL; + free_all_mmcfg(); for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; @@ -516,10 +516,7 @@ static void __init pci_mmcfg_reject_broken(int early) reject: printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); - pci_mmcfg_arch_free(); - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; - pci_mmcfg_config_num = 0; + free_all_mmcfg(); } static int __initdata known_bridge; @@ -556,7 +553,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) struct acpi_table_mcfg *mcfg; struct acpi_mcfg_allocation *cfg_table, *cfg; unsigned long i; - int entries, config_size; + int entries; if (!header) return -EINVAL; @@ -564,7 +561,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) mcfg = (struct acpi_table_mcfg *)header; /* how many config structures do we have */ - pci_mmcfg_config_num = 0; + free_all_mmcfg(); entries = 0; i = header->length - sizeof(struct acpi_table_mcfg); while (i >= sizeof(struct acpi_mcfg_allocation)) { @@ -576,25 +573,21 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) return -ENODEV; } - config_size = entries * sizeof(*pci_mmcfg_config); - pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); - if (!pci_mmcfg_config) { - printk(KERN_WARNING PREFIX - "No memory for MCFG config tables\n"); - return -ENOMEM; - } - - memcpy(pci_mmcfg_config, &mcfg[1], config_size); - pci_mmcfg_config_num = entries; - cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; for (i = 0; i < entries; i++) { cfg = &cfg_table[i]; if (acpi_mcfg_check_entry(mcfg, cfg)) { - kfree(pci_mmcfg_config); - pci_mmcfg_config_num = 0; + free_all_mmcfg(); return -ENODEV; } + + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, + cfg->end_bus_number, cfg->address) == NULL) { + printk(KERN_WARNING PREFIX + "no memory for MCFG entries\n"); + free_all_mmcfg(); + return -ENOMEM; + } } return 0; -- cgit v1.2.3 From 463a5df175e3ceed684397ee2f8a3eb523d835a0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:33:58 -0700 Subject: x86/PCI: MMCONFIG: simplify tests for empty pci_mmcfg_config table We never set pci_mmcfg_config unless we increment pci_mmcfg_config_num, so there's no need to test both pci_mmcfg_config_num and pci_mmcfg_config. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 62a8ecd96980..a0cc4d2efb8a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -472,7 +472,6 @@ static void __init pci_mmcfg_reject_broken(int early) int i; if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].address == 0)) return; @@ -618,7 +617,6 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].address == 0)) return; @@ -652,7 +650,6 @@ static int __init pci_mmcfg_late_insert_resources(void) if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].address == 0)) return 1; -- cgit v1.2.3 From f7ca69848786bb99fdfafb511791b078c298438e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:03 -0700 Subject: x86/PCI: MMCONFIG: reject MMCONFIG apertures at address zero Since all MMCONFIG regions go through pci_mmconfig_add(), we can test the address once there. If the caller supplies an address of zero, we never insert it in the pci_mmcfg_config[] table, so no need to test it elsewhere. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index a0cc4d2efb8a..067a2cfed15c 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -41,6 +41,9 @@ static __init struct acpi_mcfg_allocation *pci_mmconfig_add(int segment, int new_num = pci_mmcfg_config_num + 1; int i = pci_mmcfg_config_num; + if (addr == 0) + return NULL; + new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); if (!new) return NULL; @@ -471,8 +474,7 @@ static void __init pci_mmcfg_reject_broken(int early) typeof(pci_mmcfg_config[0]) *cfg; int i; - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config[0].address == 0)) + if (pci_mmcfg_config_num == 0) return; for (i = 0; i < pci_mmcfg_config_num; i++) { @@ -616,8 +618,7 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config[0].address == 0)) + if (pci_mmcfg_config_num == 0) return; if (pci_mmcfg_arch_init()) @@ -649,8 +650,7 @@ static int __init pci_mmcfg_late_insert_resources(void) */ if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config[0].address == 0)) + (pci_mmcfg_config_num == 0)) return 1; /* -- cgit v1.2.3 From df5eb1d67e8074dfbc23cf396c556116728187b3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:08 -0700 Subject: x86/PCI: MMCONFIG: add PCI_MMCFG_BUS_OFFSET() to factor common expression This factors out the common "bus << 20" expression used when computing the MMCONFIG address. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 2 ++ arch/x86/pci/mmconfig-shared.c | 16 ++++++++-------- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 15 +++++++-------- 4 files changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988eee3a..7d94a235ec82 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -124,6 +124,8 @@ extern void __init pci_mmcfg_arch_free(void); extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) + /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space * on their northbrige except through the * %eax register. As such, you MUST diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 067a2cfed15c..4820f0e8c594 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -355,8 +355,9 @@ static void __init pci_mmcfg_insert_resources(void) snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); - res->start = cfg->address + (cfg->start_bus_number << 20); - res->end = res->start + (num_buses << 20) - 1; + res->start = cfg->address + + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + res->end = res->start + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); names += PCI_MMCFG_RESOURCE_NAME_LEN; @@ -478,15 +479,14 @@ static void __init pci_mmcfg_reject_broken(int early) return; for (i = 0; i < pci_mmcfg_config_num; i++) { - int valid = 0; + int num_buses, valid = 0; u64 addr, size; cfg = &pci_mmcfg_config[i]; - addr = cfg->start_bus_number; - addr <<= 20; - addr += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; + addr = cfg->address + + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->pci_segment, diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f10a7e94a84c..8c19df89ad75 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -47,7 +47,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) */ static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { - u32 dev_base = base | (bus << 20) | (devfn << 12); + u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12); int cpu = smp_processor_id(); if (dev_base != mmcfg_last_accessed_device || cpu != mmcfg_last_accessed_cpu) { diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 94349f8b2f96..8588711924cc 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -43,7 +43,7 @@ static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned i addr = get_virt(seg, bus); if (!addr) return NULL; - return addr + ((bus << 20) | (devfn << 12)); + return addr + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, @@ -113,17 +113,16 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) { void __iomem *addr; u64 start, size; + int num_buses; - start = cfg->start_bus_number; - start <<= 20; - start += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); if (addr) { printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", start, start + size - 1); - addr -= cfg->start_bus_number << 20; + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); } return addr; } @@ -162,7 +161,7 @@ void __init pci_mmcfg_arch_free(void) for (i = 0; i < pci_mmcfg_config_num; ++i) { if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); + iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus_number)); pci_mmcfg_virt[i].virt = NULL; pci_mmcfg_virt[i].cfg = NULL; } -- cgit v1.2.3 From d215a9c8b46e55a1d3bc1cd907c943ef95938a0e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:13 -0700 Subject: x86/PCI: MMCONFIG: use a private structure rather than the ACPI MCFG one This adds a struct pci_mmcfg_region with a little more information than the struct acpi_mcfg_allocation used previously. The acpi_mcfg structure is defined by the spec, so we can't change it. To begin with, struct pci_mmcfg_region is basically the same as the ACPI MCFG version, but future patches will add more information. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 9 ++++++++- arch/x86/pci/mmconfig-shared.c | 10 +++++----- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 6 +++--- 4 files changed, 17 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 7d94a235ec82..3a2ca5f69521 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -118,10 +118,17 @@ extern int __init pcibios_init(void); /* pci-mmconfig.c */ +struct pci_mmcfg_region { + u64 address; + u16 pci_segment; + u8 start_bus_number; + u8 end_bus_number; +}; + extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); -extern struct acpi_mcfg_allocation *pci_mmcfg_config; +extern struct pci_mmcfg_region *pci_mmcfg_config; extern int pci_mmcfg_config_num; #define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 4820f0e8c594..5f7afdd1e2d6 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -34,10 +34,10 @@ static __init void free_all_mmcfg(void) pci_mmcfg_config = NULL; } -static __init struct acpi_mcfg_allocation *pci_mmconfig_add(int segment, - int start, int end, u64 addr) +static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, + int end, u64 addr) { - struct acpi_mcfg_allocation *new; + struct pci_mmcfg_region *new; int new_num = pci_mmcfg_config_num + 1; int i = pci_mmcfg_config_num; @@ -349,7 +349,7 @@ static void __init pci_mmcfg_insert_resources(void) names = (void *)&res[pci_mmcfg_config_num]; for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; + struct pci_mmcfg_region *cfg = &pci_mmcfg_config[i]; num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; res->name = names; snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, @@ -523,7 +523,7 @@ reject: static int __initdata known_bridge; /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct acpi_mcfg_allocation *pci_mmcfg_config; +struct pci_mmcfg_region *pci_mmcfg_config; int pci_mmcfg_config_num; static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 8c19df89ad75..3936eced993c 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -27,7 +27,7 @@ static int mmcfg_last_accessed_cpu; */ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { - struct acpi_mcfg_allocation *cfg; + struct pci_mmcfg_region *cfg; int cfg_num; for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 8588711924cc..7a6231c3335e 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -14,14 +14,14 @@ /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { - struct acpi_mcfg_allocation *cfg; + struct pci_mmcfg_region *cfg; char __iomem *virt; }; static struct mmcfg_virt *pci_mmcfg_virt; static char __iomem *get_virt(unsigned int seg, unsigned bus) { - struct acpi_mcfg_allocation *cfg; + struct pci_mmcfg_region *cfg; int cfg_num; for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { @@ -109,7 +109,7 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; -static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) +static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) { void __iomem *addr; u64 start, size; -- cgit v1.2.3 From d7e6b66fe87c9f42480d73fc314aecaeae84ca6b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:18 -0700 Subject: x86/PCI: MMCONFIG: rename pci_mmcfg_region structure members This only renames the struct pci_mmcfg_region members; no functional change. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 6 ++--- arch/x86/pci/mmconfig-shared.c | 50 +++++++++++++++++++++--------------------- arch/x86/pci/mmconfig_32.c | 6 ++--- arch/x86/pci/mmconfig_64.c | 16 +++++++------- 4 files changed, 39 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 3a2ca5f69521..a752d618f196 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -120,9 +120,9 @@ extern int __init pcibios_init(void); struct pci_mmcfg_region { u64 address; - u16 pci_segment; - u8 start_bus_number; - u8 end_bus_number; + u16 segment; + u8 start_bus; + u8 end_bus; }; extern int __init pci_mmcfg_arch_init(void); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5f7afdd1e2d6..5479fbb2d6ab 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -57,9 +57,9 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, pci_mmcfg_config_num++; pci_mmcfg_config[i].address = addr; - pci_mmcfg_config[i].pci_segment = segment; - pci_mmcfg_config[i].start_bus_number = start; - pci_mmcfg_config[i].end_bus_number = end; + pci_mmcfg_config[i].segment = segment; + pci_mmcfg_config[i].start_bus = start; + pci_mmcfg_config[i].end_bus = end; return &pci_mmcfg_config[i]; } @@ -260,8 +260,8 @@ static int __init cmp_mmcfg(const void *x1, const void *x2) const typeof(pci_mmcfg_config[0]) *m2 = x2; int start1, start2; - start1 = m1->start_bus_number; - start2 = m2->start_bus_number; + start1 = m1->start_bus; + start2 = m2->start_bus; return start1 - start2; } @@ -279,8 +279,8 @@ static void __init pci_mmcfg_check_end_bus_number(void) if (pci_mmcfg_config_num > 0) { i = pci_mmcfg_config_num - 1; cfg = &pci_mmcfg_config[i]; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; } /* don't overlap please */ @@ -288,11 +288,11 @@ static void __init pci_mmcfg_check_end_bus_number(void) cfg = &pci_mmcfg_config[i]; cfgx = &pci_mmcfg_config[i+1]; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; - if (cfg->end_bus_number >= cfgx->start_bus_number) - cfg->end_bus_number = cfgx->start_bus_number - 1; + if (cfg->end_bus >= cfgx->start_bus) + cfg->end_bus = cfgx->start_bus - 1; } } @@ -350,13 +350,13 @@ static void __init pci_mmcfg_insert_resources(void) names = (void *)&res[pci_mmcfg_config_num]; for (i = 0; i < pci_mmcfg_config_num; i++, res++) { struct pci_mmcfg_region *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + num_buses = cfg->end_bus - cfg->start_bus + 1; res->name = names; snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, - "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, - cfg->start_bus_number, cfg->end_bus_number); + "PCI MMCONFIG %u [%02x-%02x]", cfg->segment, + cfg->start_bus, cfg->end_bus); res->start = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); res->end = res->start + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); @@ -457,13 +457,13 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, valid = 1; if (old_size != size) { - /* update end_bus_number */ - cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); + /* update end_bus */ + cfg->end_bus = cfg->start_bus + ((size>>20) - 1); printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); + i, (unsigned long)cfg->address, cfg->segment, + (unsigned int)cfg->start_bus, + (unsigned int)cfg->end_bus); } } @@ -484,14 +484,14 @@ static void __init pci_mmcfg_reject_broken(int early) cfg = &pci_mmcfg_config[i]; addr = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); + i, (unsigned long)cfg->address, cfg->segment, + (unsigned int)cfg->start_bus, + (unsigned int)cfg->end_bus); if (!early && !acpi_disabled) valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 3936eced993c..a3cee532c935 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -32,9 +32,9 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { cfg = &pci_mmcfg_config[cfg_num]; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) + if (cfg->segment == seg && + (cfg->start_bus <= bus) && + (cfg->end_bus >= bus)) return cfg->address; } diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 7a6231c3335e..fdf08f97131b 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -26,9 +26,9 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { cfg = pci_mmcfg_virt[cfg_num].cfg; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) + if (cfg->segment == seg && + (cfg->start_bus <= bus) && + (cfg->end_bus >= bus)) return pci_mmcfg_virt[cfg_num].virt; } @@ -115,14 +115,14 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) u64 start, size; int num_buses; - start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); if (addr) { printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", start, start + size - 1); - addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus_number); + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); } return addr; } @@ -143,7 +143,7 @@ int __init pci_mmcfg_arch_init(void) if (!pci_mmcfg_virt[i].virt) { printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", - pci_mmcfg_config[i].pci_segment); + pci_mmcfg_config[i].segment); pci_mmcfg_arch_free(); return 0; } @@ -161,7 +161,7 @@ void __init pci_mmcfg_arch_free(void) for (i = 0; i < pci_mmcfg_config_num; ++i) { if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus_number)); + iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus)); pci_mmcfg_virt[i].virt = NULL; pci_mmcfg_virt[i].cfg = NULL; } -- cgit v1.2.3 From 95cf1cf0c5a767feb811dfed298b95b1df8824c7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:24 -0700 Subject: x86/PCI: MMCONFIG: use pointer to simplify pci_mmcfg_config[] structure access No functional change, but simplifies a future patch to convert the table to a list. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5479fbb2d6ab..28ac9f58a986 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -54,12 +54,14 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, kfree(pci_mmcfg_config); } pci_mmcfg_config = new; - pci_mmcfg_config_num++; - pci_mmcfg_config[i].address = addr; - pci_mmcfg_config[i].segment = segment; - pci_mmcfg_config[i].start_bus = start; - pci_mmcfg_config[i].end_bus = end; + + new = &pci_mmcfg_config[i]; + + new->address = addr; + new->segment = segment; + new->start_bus = start; + new->end_bus = end; return &pci_mmcfg_config[i]; } -- cgit v1.2.3 From 56ddf4d3cf04e80254d3d721c6bea2f8ec44c41a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:29 -0700 Subject: x86/PCI: MMCONFIG: add resource to struct pci_mmcfg_region This patch adds a resource and corresponding name to the MMCONFIG structure. This makes allocation simpler (we can allocate the resource and name at the same time we allocate the pci_mmcfg_region), and gives us a way to hang onto the resource after inserting it. This will be needed so we can release and free it when hot-removing a host bridge. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 5 ++++ arch/x86/pci/mmconfig-shared.c | 64 ++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index a752d618f196..a6d42c10b017 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -118,11 +118,16 @@ extern int __init pcibios_init(void); /* pci-mmconfig.c */ +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */ +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) + struct pci_mmcfg_region { + struct resource res; u64 address; u16 segment; u8 start_bus; u8 end_bus; + char name[PCI_MMCFG_RESOURCE_NAME_LEN]; }; extern int __init pci_mmcfg_arch_init(void); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 28ac9f58a986..ba3aa3697418 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,15 @@ static int __initdata pci_mmcfg_resources_inserted; static __init void free_all_mmcfg(void) { + int i; + struct pci_mmcfg_region *cfg; + pci_mmcfg_arch_free(); + for (i = 0; i < pci_mmcfg_config_num; i++) { + cfg = &pci_mmcfg_config[i]; + if (cfg->res.parent) + release_resource(&cfg->res); + } pci_mmcfg_config_num = 0; kfree(pci_mmcfg_config); pci_mmcfg_config = NULL; @@ -40,6 +48,8 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, struct pci_mmcfg_region *new; int new_num = pci_mmcfg_config_num + 1; int i = pci_mmcfg_config_num; + int num_buses; + struct resource *res; if (addr == 0) return NULL; @@ -63,6 +73,15 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, new->start_bus = start; new->end_bus = end; + num_buses = end - start + 1; + res = &new->res; + res->start = addr + PCI_MMCFG_BUS_OFFSET(start); + res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); + res->name = new->name; + return &pci_mmcfg_config[i]; } @@ -336,33 +355,12 @@ static int __init pci_mmcfg_check_hostbridge(void) static void __init pci_mmcfg_insert_resources(void) { -#define PCI_MMCFG_RESOURCE_NAME_LEN 24 int i; - struct resource *res; - char *names; - unsigned num_buses; - - res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), - pci_mmcfg_config_num, GFP_KERNEL); - if (!res) { - printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); - return; - } + struct pci_mmcfg_region *cfg; - names = (void *)&res[pci_mmcfg_config_num]; - for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct pci_mmcfg_region *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus - cfg->start_bus + 1; - res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, - "PCI MMCONFIG %u [%02x-%02x]", cfg->segment, - cfg->start_bus, cfg->end_bus); - res->start = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus); - res->end = res->start + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - names += PCI_MMCFG_RESOURCE_NAME_LEN; + for (i = 0; i < pci_mmcfg_config_num; i++) { + cfg = &pci_mmcfg_config[i]; + insert_resource(&iomem_resource, &cfg->res); } /* Mark that the resources have been inserted. */ @@ -444,7 +442,7 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, typeof(pci_mmcfg_config[0]) *cfg, int with_e820) { u64 old_size = size; - int valid = 0; + int valid = 0, num_buses; while (!is_reserved(addr, addr + size, E820_RESERVED)) { size >>= 1; @@ -461,6 +459,12 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, if (old_size != size) { /* update end_bus */ cfg->end_bus = cfg->start_bus + ((size>>20) - 1); + num_buses = cfg->end_bus - cfg->start_bus + 1; + cfg->res.end = cfg->res.start + + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", + cfg->segment, cfg->start_bus, cfg->end_bus); printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, @@ -481,14 +485,12 @@ static void __init pci_mmcfg_reject_broken(int early) return; for (i = 0; i < pci_mmcfg_config_num; i++) { - int num_buses, valid = 0; + int valid = 0; u64 addr, size; cfg = &pci_mmcfg_config[i]; - addr = cfg->address + - PCI_MMCFG_BUS_OFFSET(cfg->start_bus); - num_buses = cfg->end_bus - cfg->start_bus + 1; - size = PCI_MMCFG_BUS_OFFSET(num_buses); + addr = cfg->res.start; + size = resource_size(&cfg->res); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, -- cgit v1.2.3 From 2f2a8b9c90279e75f87aaf322a948bdced27e89f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:34 -0700 Subject: x86/PCI: MMCONFIG: trivial is_mmconf_reserved() interface simplification Since pci_mmcfg_region contains the struct resource, no need to pass the pci_mmcfg_region *and* the resource start/size. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index ba3aa3697418..90422b4a7c91 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -438,9 +438,10 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - u64 addr, u64 size, int i, - typeof(pci_mmcfg_config[0]) *cfg, int with_e820) + int i, typeof(pci_mmcfg_config[0]) *cfg, int with_e820) { + u64 addr = cfg->res.start; + u64 size = resource_size(&cfg->res); u64 old_size = size; int valid = 0, num_buses; @@ -486,11 +487,8 @@ static void __init pci_mmcfg_reject_broken(int early) for (i = 0; i < pci_mmcfg_config_num; i++) { int valid = 0; - u64 addr, size; cfg = &pci_mmcfg_config[i]; - addr = cfg->res.start; - size = resource_size(&cfg->res); printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, @@ -498,7 +496,7 @@ static void __init pci_mmcfg_reject_broken(int early) (unsigned int)cfg->end_bus); if (!early && !acpi_disabled) - valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); + valid = is_mmconf_reserved(is_acpi_reserved, i, cfg, 0); if (valid) continue; @@ -511,7 +509,7 @@ static void __init pci_mmcfg_reject_broken(int early) /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); + valid = is_mmconf_reserved(e820_all_mapped, i, cfg, 1); if (!valid) goto reject; -- cgit v1.2.3 From 3f0f5503926f7447615f083c2d57545a83b6357c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:39 -0700 Subject: x86/PCI: MMCONFIG: add virtual address to struct pci_mmcfg_region The virtual address is only used for x86_64, but it's so much simpler to manage it as part of the pci_mmcfg_region that I think it's worth wasting a pointer per MMCONFIG region on x86_32. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 1 + arch/x86/pci/mmconfig_64.c | 45 +++++++++++++----------------------------- 2 files changed, 15 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index a6d42c10b017..7aa2ed8f25ab 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -124,6 +124,7 @@ extern int __init pcibios_init(void); struct pci_mmcfg_region { struct resource res; u64 address; + char __iomem *virt; u16 segment; u8 start_bus; u8 end_bus; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index fdf08f97131b..78fa05c6c04d 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -12,24 +12,17 @@ #include #include -/* Static virtual mapping of the MMCONFIG aperture */ -struct mmcfg_virt { - struct pci_mmcfg_region *cfg; - char __iomem *virt; -}; -static struct mmcfg_virt *pci_mmcfg_virt; - static char __iomem *get_virt(unsigned int seg, unsigned bus) { + int i; struct pci_mmcfg_region *cfg; - int cfg_num; - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = pci_mmcfg_virt[cfg_num].cfg; + for (i = 0; i < pci_mmcfg_config_num; ++i) { + cfg = &pci_mmcfg_config[i]; if (cfg->segment == seg && (cfg->start_bus <= bus) && (cfg->end_bus >= bus)) - return pci_mmcfg_virt[cfg_num].virt; + return cfg->virt; } /* Fall back to type 0 */ @@ -130,20 +123,15 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) int __init pci_mmcfg_arch_init(void) { int i; - pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * - pci_mmcfg_config_num, GFP_KERNEL); - if (pci_mmcfg_virt == NULL) { - printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); - return 0; - } + struct pci_mmcfg_region *cfg; for (i = 0; i < pci_mmcfg_config_num; ++i) { - pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; - pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); - if (!pci_mmcfg_virt[i].virt) { + cfg = &pci_mmcfg_config[i]; + cfg->virt = mcfg_ioremap(cfg); + if (!cfg->virt) { printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", - pci_mmcfg_config[i].segment); + cfg->segment); pci_mmcfg_arch_free(); return 0; } @@ -155,18 +143,13 @@ int __init pci_mmcfg_arch_init(void) void __init pci_mmcfg_arch_free(void) { int i; - - if (pci_mmcfg_virt == NULL) - return; + struct pci_mmcfg_region *cfg; for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + PCI_MMCFG_BUS_OFFSET(pci_mmcfg_virt[i].cfg->start_bus)); - pci_mmcfg_virt[i].virt = NULL; - pci_mmcfg_virt[i].cfg = NULL; + cfg = &pci_mmcfg_config[i]; + if (cfg->virt) { + iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); + cfg->virt = NULL; } } - - kfree(pci_mmcfg_virt); - pci_mmcfg_virt = NULL; } -- cgit v1.2.3 From 987c367b4e93be6826394e7c9cc14d28bb5c8810 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:44 -0700 Subject: x86/PCI: MMCONFIG: remove typeof so we can use a list This replaces "typeof(pci_mmcfg_config[0])" with the actual type because I plan to convert pci_mmcfg_config to a list, and then "pci_mmcfg_config[0]" won't mean anything. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 90422b4a7c91..6eeeac0d25f4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -277,8 +277,8 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { static int __init cmp_mmcfg(const void *x1, const void *x2) { - const typeof(pci_mmcfg_config[0]) *m1 = x1; - const typeof(pci_mmcfg_config[0]) *m2 = x2; + const struct pci_mmcfg_region *m1 = x1; + const struct pci_mmcfg_region *m2 = x2; int start1, start2; start1 = m1->start_bus; @@ -290,7 +290,7 @@ static int __init cmp_mmcfg(const void *x1, const void *x2) static void __init pci_mmcfg_check_end_bus_number(void) { int i; - typeof(pci_mmcfg_config[0]) *cfg, *cfgx; + struct pci_mmcfg_region *cfg, *cfgx; /* sort them at first */ sort(pci_mmcfg_config, pci_mmcfg_config_num, @@ -438,7 +438,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - int i, typeof(pci_mmcfg_config[0]) *cfg, int with_e820) + int i, struct pci_mmcfg_region *cfg, int with_e820) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); @@ -479,7 +479,7 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, static void __init pci_mmcfg_reject_broken(int early) { - typeof(pci_mmcfg_config[0]) *cfg; + struct pci_mmcfg_region *cfg; int i; if (pci_mmcfg_config_num == 0) -- cgit v1.2.3 From ff097ddd4aeac790fd51d013c79c2f18ec9a7117 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:49 -0700 Subject: x86/PCI: MMCONFIG: manage pci_mmcfg_region as a list, not a table This changes pci_mmcfg_region from a table to a list, to make it easier to add and remove MMCONFIG regions for PCI host bridge hotplug. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 4 +- arch/x86/pci/mmconfig-shared.c | 106 ++++++++++++++++------------------------- arch/x86/pci/mmconfig_32.c | 5 +- arch/x86/pci/mmconfig_64.c | 13 ++--- 4 files changed, 47 insertions(+), 81 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 7aa2ed8f25ab..0b7c316a70c3 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -122,6 +122,7 @@ extern int __init pcibios_init(void); #define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) struct pci_mmcfg_region { + struct list_head list; struct resource res; u64 address; char __iomem *virt; @@ -134,8 +135,7 @@ struct pci_mmcfg_region { extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); -extern struct pci_mmcfg_region *pci_mmcfg_config; -extern int pci_mmcfg_config_num; +extern struct list_head pci_mmcfg_list; #define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 6eeeac0d25f4..2709aa81801d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -26,53 +25,58 @@ /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; +LIST_HEAD(pci_mmcfg_list); + static __init void free_all_mmcfg(void) { - int i; - struct pci_mmcfg_region *cfg; + struct pci_mmcfg_region *cfg, *tmp; pci_mmcfg_arch_free(); - for (i = 0; i < pci_mmcfg_config_num; i++) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) { if (cfg->res.parent) release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); } - pci_mmcfg_config_num = 0; - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; +} + +static __init void list_add_sorted(struct pci_mmcfg_region *new) +{ + struct pci_mmcfg_region *cfg; + + /* keep list sorted by segment and starting bus number */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->segment > new->segment || + (cfg->segment == new->segment && + cfg->start_bus >= new->start_bus)) { + list_add_tail(&new->list, &cfg->list); + return; + } + } + list_add_tail(&new->list, &pci_mmcfg_list); } static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, int end, u64 addr) { struct pci_mmcfg_region *new; - int new_num = pci_mmcfg_config_num + 1; - int i = pci_mmcfg_config_num; int num_buses; struct resource *res; if (addr == 0) return NULL; - new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; - if (pci_mmcfg_config) { - memcpy(new, pci_mmcfg_config, - sizeof(pci_mmcfg_config[0]) * new_num); - kfree(pci_mmcfg_config); - } - pci_mmcfg_config = new; - pci_mmcfg_config_num++; - - new = &pci_mmcfg_config[i]; - new->address = addr; new->segment = segment; new->start_bus = start; new->end_bus = end; + list_add_sorted(new); + num_buses = end - start + 1; res = &new->res; res->start = addr + PCI_MMCFG_BUS_OFFSET(start); @@ -82,7 +86,7 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); res->name = new->name; - return &pci_mmcfg_config[i]; + return new; } static const char __init *pci_mmcfg_e7520(void) @@ -214,7 +218,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) /* * do check if amd fam10h already took over */ - if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) + if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked) return NULL; mcp55_checked = true; @@ -275,44 +279,26 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { 0x0369, pci_mmcfg_nvidia_mcp55 }, }; -static int __init cmp_mmcfg(const void *x1, const void *x2) -{ - const struct pci_mmcfg_region *m1 = x1; - const struct pci_mmcfg_region *m2 = x2; - int start1, start2; - - start1 = m1->start_bus; - start2 = m2->start_bus; - - return start1 - start2; -} - static void __init pci_mmcfg_check_end_bus_number(void) { - int i; struct pci_mmcfg_region *cfg, *cfgx; - /* sort them at first */ - sort(pci_mmcfg_config, pci_mmcfg_config_num, - sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); - /* last one*/ - if (pci_mmcfg_config_num > 0) { - i = pci_mmcfg_config_num - 1; - cfg = &pci_mmcfg_config[i]; + cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list); + if (cfg) if (cfg->end_bus < cfg->start_bus) cfg->end_bus = 255; - } - /* don't overlap please */ - for (i = 0; i < pci_mmcfg_config_num - 1; i++) { - cfg = &pci_mmcfg_config[i]; - cfgx = &pci_mmcfg_config[i+1]; + if (list_is_singular(&pci_mmcfg_list)) + return; + /* don't overlap please */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { if (cfg->end_bus < cfg->start_bus) cfg->end_bus = 255; - if (cfg->end_bus >= cfgx->start_bus) + cfgx = list_entry(cfg->list.next, typeof(*cfg), list); + if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) cfg->end_bus = cfgx->start_bus - 1; } } @@ -350,18 +336,15 @@ static int __init pci_mmcfg_check_hostbridge(void) /* some end_bus_number is crazy, fix it */ pci_mmcfg_check_end_bus_number(); - return pci_mmcfg_config_num != 0; + return !list_empty(&pci_mmcfg_list); } static void __init pci_mmcfg_insert_resources(void) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; i++) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) insert_resource(&iomem_resource, &cfg->res); - } /* Mark that the resources have been inserted. */ pci_mmcfg_resources_inserted = 1; @@ -482,18 +465,15 @@ static void __init pci_mmcfg_reject_broken(int early) struct pci_mmcfg_region *cfg; int i; - if (pci_mmcfg_config_num == 0) - return; - - for (i = 0; i < pci_mmcfg_config_num; i++) { + list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - cfg = &pci_mmcfg_config[i]; printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->segment, (unsigned int)cfg->start_bus, (unsigned int)cfg->end_bus); + i++; if (!early && !acpi_disabled) valid = is_mmconf_reserved(is_acpi_reserved, i, cfg, 0); @@ -524,10 +504,6 @@ reject: static int __initdata known_bridge; -/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct pci_mmcfg_region *pci_mmcfg_config; -int pci_mmcfg_config_num; - static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, struct acpi_mcfg_allocation *cfg) { @@ -620,7 +596,7 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); - if (pci_mmcfg_config_num == 0) + if (list_empty(&pci_mmcfg_list)) return; if (pci_mmcfg_arch_init()) @@ -652,7 +628,7 @@ static int __init pci_mmcfg_late_insert_resources(void) */ if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0)) + list_empty(&pci_mmcfg_list)) return 1; /* diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index a3cee532c935..c04523e09649 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -28,15 +28,12 @@ static int mmcfg_last_accessed_cpu; static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { struct pci_mmcfg_region *cfg; - int cfg_num; - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = &pci_mmcfg_config[cfg_num]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) if (cfg->segment == seg && (cfg->start_bus <= bus) && (cfg->end_bus >= bus)) return cfg->address; - } /* Fall back to type 0 */ return 0; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 78fa05c6c04d..ed1f479b4d0e 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -14,16 +14,13 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) if (cfg->segment == seg && (cfg->start_bus <= bus) && (cfg->end_bus >= bus)) return cfg->virt; - } /* Fall back to type 0 */ return NULL; @@ -122,11 +119,9 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) int __init pci_mmcfg_arch_init(void) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) { cfg->virt = mcfg_ioremap(cfg); if (!cfg->virt) { printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " @@ -142,11 +137,9 @@ int __init pci_mmcfg_arch_init(void) void __init pci_mmcfg_arch_free(void) { - int i; struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - cfg = &pci_mmcfg_config[i]; + list_for_each_entry(cfg, &pci_mmcfg_list, list) { if (cfg->virt) { iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); cfg->virt = NULL; -- cgit v1.2.3 From ba2afbabfc44d6322e8607c004f37868ff786cf8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:54 -0700 Subject: x86/PCI: MMCONFIG: add pci_mmconfig_remove() to remove MMCONFIG region This is only used internally now, but eventually will be used in the hot-remove path to remove the MMCONFIG region associated with a host bridge. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 2709aa81801d..392f8fe16955 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -27,17 +27,21 @@ static int __initdata pci_mmcfg_resources_inserted; LIST_HEAD(pci_mmcfg_list); +static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg) +{ + if (cfg->res.parent) + release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); +} + static __init void free_all_mmcfg(void) { struct pci_mmcfg_region *cfg, *tmp; pci_mmcfg_arch_free(); - list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) { - if (cfg->res.parent) - release_resource(&cfg->res); - list_del(&cfg->list); - kfree(cfg); - } + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) + pci_mmconfig_remove(cfg); } static __init void list_add_sorted(struct pci_mmcfg_region *new) -- cgit v1.2.3 From 8c57786ad3d921713c7ad8e44132aa537a1d0fec Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:34:59 -0700 Subject: x86/PCI: MMCONFIG: clean up printks No functional change; just tidy up printks and make them more consistent with the rest of PCI. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/mmconfig-shared.c | 46 +++++++++++++++++++----------------------- arch/x86/pci/mmconfig_64.c | 12 +++++------ 2 files changed, 26 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 392f8fe16955..71d69b88fa33 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -90,6 +90,10 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); res->name = new->name; + printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " + "%pR (base %#lx)\n", segment, start, end, &new->res, + (unsigned long) addr); + return new; } @@ -333,7 +337,7 @@ static int __init pci_mmcfg_check_hostbridge(void) name = pci_mmcfg_probes[i].probe(); if (name) - printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", + printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", name); } @@ -425,7 +429,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - int i, struct pci_mmcfg_region *cfg, int with_e820) + struct pci_mmcfg_region *cfg, int with_e820) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); @@ -439,9 +443,9 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, } if (size >= (16UL<<20) || size == old_size) { - printk(KERN_NOTICE - "PCI: MCFG area at %Lx reserved in %s\n", - addr, with_e820?"E820":"ACPI motherboard resources"); + printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", + &cfg->res, + with_e820 ? "E820" : "ACPI motherboard resources"); valid = 1; if (old_size != size) { @@ -453,11 +457,11 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %04x [bus %02x-%02x]", cfg->segment, cfg->start_bus, cfg->end_bus); - printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->segment, - (unsigned int)cfg->start_bus, - (unsigned int)cfg->end_bus); + printk(KERN_INFO PREFIX + "MMCONFIG for %04x [bus%02x-%02x] " + "at %pR (base %#lx) (size reduced!)\n", + cfg->segment, cfg->start_bus, cfg->end_bus, + &cfg->res, (unsigned long) cfg->address); } } @@ -467,33 +471,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, static void __init pci_mmcfg_reject_broken(int early) { struct pci_mmcfg_region *cfg; - int i; list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->segment, - (unsigned int)cfg->start_bus, - (unsigned int)cfg->end_bus); - i++; - if (!early && !acpi_disabled) - valid = is_mmconf_reserved(is_acpi_reserved, i, cfg, 0); + valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); if (valid) continue; if (!early) - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " reserved in ACPI motherboard resources\n", - cfg->address); + printk(KERN_ERR FW_BUG PREFIX + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", &cfg->res); /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - valid = is_mmconf_reserved(e820_all_mapped, i, cfg, 1); + valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); if (!valid) goto reject; @@ -502,7 +498,7 @@ static void __init pci_mmcfg_reject_broken(int early) return; reject: - printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); + printk(KERN_INFO PREFIX "not using MMCONFIG\n"); free_all_mmcfg(); } @@ -525,7 +521,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, return 0; } - printk(KERN_ERR PREFIX "MCFG region for %04x:%02x-%02x at %#llx " + printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " "is above 4GB, ignored\n", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number, cfg->address); return -EINVAL; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index ed1f479b4d0e..cfa6cdb6d262 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -12,6 +12,8 @@ #include #include +#define PREFIX "PCI: " + static char __iomem *get_virt(unsigned int seg, unsigned bus) { struct pci_mmcfg_region *cfg; @@ -109,11 +111,8 @@ static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) num_buses = cfg->end_bus - cfg->start_bus + 1; size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); - if (addr) { - printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", - start, start + size - 1); + if (addr) addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); - } return addr; } @@ -124,9 +123,8 @@ int __init pci_mmcfg_arch_init(void) list_for_each_entry(cfg, &pci_mmcfg_list, list) { cfg->virt = mcfg_ioremap(cfg); if (!cfg->virt) { - printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " - "segment %d\n", - cfg->segment); + printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", + &cfg->res); pci_mmcfg_arch_free(); return 0; } -- cgit v1.2.3 From f6e1d8cc38b3776038fb15d3acc82ed8bb552f82 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 13 Nov 2009 17:35:04 -0700 Subject: x86/PCI: MMCONFIG: add lookup function This patch factors out the search for an MMCONFIG region, which was previously implemented in both mmconfig_32 and mmconfig_64. No functional change. Reviewed-by: Yinghai Lu Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/include/asm/pci_x86.h | 1 + arch/x86/pci/mmconfig-shared.c | 12 ++++++++++++ arch/x86/pci/mmconfig_32.c | 11 +++-------- arch/x86/pci/mmconfig_64.c | 23 ++++------------------- 4 files changed, 20 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 0b7c316a70c3..b4bf9a942ed0 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -134,6 +134,7 @@ struct pci_mmcfg_region { extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); +extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); extern struct list_head pci_mmcfg_list; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 71d69b88fa33..b19d1e54201e 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -97,6 +97,18 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, return new; } +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) +{ + struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) + if (cfg->segment == segment && + cfg->start_bus <= bus && bus <= cfg->end_bus) + return cfg; + + return NULL; +} + static const char __init *pci_mmcfg_e7520(void) { u32 win; diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index c04523e09649..90d5fd476ed4 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -27,15 +27,10 @@ static int mmcfg_last_accessed_cpu; */ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { - struct pci_mmcfg_region *cfg; + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - list_for_each_entry(cfg, &pci_mmcfg_list, list) - if (cfg->segment == seg && - (cfg->start_bus <= bus) && - (cfg->end_bus >= bus)) - return cfg->address; - - /* Fall back to type 0 */ + if (cfg) + return cfg->address; return 0; } diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index cfa6cdb6d262..e783841bd1d7 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -14,28 +14,13 @@ #define PREFIX "PCI: " -static char __iomem *get_virt(unsigned int seg, unsigned bus) -{ - struct pci_mmcfg_region *cfg; - - list_for_each_entry(cfg, &pci_mmcfg_list, list) - if (cfg->segment == seg && - (cfg->start_bus <= bus) && - (cfg->end_bus >= bus)) - return cfg->virt; - - /* Fall back to type 0 */ - return NULL; -} - static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - char __iomem *addr; + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - addr = get_virt(seg, bus); - if (!addr) - return NULL; - return addr + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, -- cgit v1.2.3 From 2263576cfc6e8f6ab038126c3254404b9fcb1c33 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 13 Nov 2009 10:06:08 +0800 Subject: ACPICA: Add post-order callback to acpi_walk_namespace The existing interface only has a pre-order callback. This change adds an additional parameter for a post-order callback which will be more useful for bus scans. ACPICA BZ 779. Also update the external calls to acpi_walk_namespace. http://www.acpica.org/bugzilla/show_bug.cgi?id=779 Signed-off-by: Lin Ming Signed-off-by: Bob Moore Signed-off-by: Len Brown --- arch/ia64/sn/kernel/io_acpi_init.c | 2 +- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- drivers/acpi/acpi_memhotplug.c | 4 +- drivers/acpi/acpica/acnamesp.h | 3 +- drivers/acpi/acpica/dsinit.c | 2 +- drivers/acpi/acpica/evgpeblk.c | 8 +- drivers/acpi/acpica/evregion.c | 6 +- drivers/acpi/acpica/nsdump.c | 4 +- drivers/acpi/acpica/nsdumpdv.c | 3 +- drivers/acpi/acpica/nsinit.c | 8 +- drivers/acpi/acpica/nswalk.c | 200 +++++++++++++++++++-------------- drivers/acpi/acpica/nsxfeval.c | 32 +++--- drivers/acpi/container.c | 4 +- drivers/acpi/dock.c | 8 +- drivers/acpi/ec.c | 2 +- drivers/acpi/glue.c | 2 +- drivers/acpi/pci_slot.c | 8 +- drivers/acpi/processor_core.c | 4 +- drivers/acpi/scan.c | 2 +- drivers/acpi/video_detect.c | 6 +- drivers/gpu/drm/i915/intel_lvds.c | 2 +- drivers/i2c/busses/i2c-scmi.c | 2 +- drivers/pci/hotplug/acpi_pcihp.c | 2 +- drivers/pci/hotplug/acpiphp_glue.c | 22 ++-- drivers/pci/hotplug/acpiphp_ibm.c | 2 +- drivers/platform/x86/intel_menlow.c | 2 +- drivers/platform/x86/sony-laptop.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 2 +- include/acpi/acpixf.h | 3 +- 29 files changed, 197 insertions(+), 152 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index fd50ff94302b..66f633bff059 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -390,7 +390,7 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info, pcidev_match.handle = NULL; acpi_walk_namespace(ACPI_TYPE_DEVICE, rootbus_handle, ACPI_UINT32_MAX, - find_matching_device, &pcidev_match, NULL); + find_matching_device, NULL, &pcidev_match, NULL); if (!pcidev_match.handle) { printk(KERN_ERR diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index cabd2fa3fc93..7e7eea4f8261 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) /* Find ACPI data for processor */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, + ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, NULL, (void *)&pr); /* Check ACPI support for C3 state */ diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 28ccdbc05ac8..3597d73f28f6 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -537,7 +537,7 @@ static int __init acpi_memory_device_init(void) status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - acpi_memory_register_notify_handler, + acpi_memory_register_notify_handler, NULL, NULL, NULL); if (ACPI_FAILURE(status)) { @@ -561,7 +561,7 @@ static void __exit acpi_memory_device_exit(void) */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - acpi_memory_deregister_notify_handler, + acpi_memory_deregister_notify_handler, NULL, NULL, NULL); if (ACPI_FAILURE(status)) diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 09a2764c734b..168e60893a2a 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -104,7 +104,8 @@ acpi_ns_walk_namespace(acpi_object_type type, acpi_handle start_object, u32 max_depth, u32 flags, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value); struct acpi_namespace_node *acpi_ns_get_next_node(struct acpi_namespace_node diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c index 3aae13f30c5e..f23fa0be6fc2 100644 --- a/drivers/acpi/acpica/dsinit.c +++ b/drivers/acpi/acpica/dsinit.c @@ -192,7 +192,7 @@ acpi_ds_initialize_objects(u32 table_index, status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, start_node, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, acpi_ds_init_one_object, - &info, NULL); + NULL, &info, NULL); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace")); } diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c index a60aaa7635f3..247920900187 100644 --- a/drivers/acpi/acpica/evgpeblk.c +++ b/drivers/acpi/acpica/evgpeblk.c @@ -945,8 +945,8 @@ acpi_ev_create_gpe_block(struct acpi_namespace_node *gpe_device, status = acpi_ns_walk_namespace(ACPI_TYPE_METHOD, gpe_device, ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK, - acpi_ev_save_method_info, gpe_block, - NULL); + acpi_ev_save_method_info, NULL, + gpe_block, NULL); /* Return the new block */ @@ -1022,8 +1022,8 @@ acpi_ev_initialize_gpe_block(struct acpi_namespace_node *gpe_device, status = acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - acpi_ev_match_prw_and_gpe, &gpe_info, - NULL); + acpi_ev_match_prw_and_gpe, NULL, + &gpe_info, NULL); } /* diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index c9fa040725d4..582b0af01e99 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -1025,8 +1025,8 @@ acpi_ev_install_space_handler(struct acpi_namespace_node * node, */ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - acpi_ev_install_handler, handler_obj, - NULL); + acpi_ev_install_handler, NULL, + handler_obj, NULL); unlock_and_exit: return_ACPI_STATUS(status); @@ -1062,7 +1062,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, */ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, - &space_id, NULL); + NULL, &space_id, NULL); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index 2bad613db73a..2deb986861ca 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -634,8 +634,8 @@ acpi_ns_dump_objects(acpi_object_type type, (void)acpi_ns_walk_namespace(type, start_handle, max_depth, ACPI_NS_WALK_NO_UNLOCK | ACPI_NS_WALK_TEMP_NODES, - acpi_ns_dump_one_object, (void *)&info, - NULL); + acpi_ns_dump_one_object, NULL, + (void *)&info, NULL); } #endif /* ACPI_FUTURE_USAGE */ diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c index 0fe87f1aef16..36be7f0e97ec 100644 --- a/drivers/acpi/acpica/nsdumpdv.c +++ b/drivers/acpi/acpica/nsdumpdv.c @@ -131,7 +131,8 @@ void acpi_ns_dump_root_devices(void) status = acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, sys_bus_handle, ACPI_UINT32_MAX, ACPI_NS_WALK_NO_UNLOCK, - acpi_ns_dump_one_device, NULL, NULL); + acpi_ns_dump_one_device, NULL, NULL, + NULL); } #endif diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 1d5b360eb25b..4f8abac231d2 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -96,7 +96,7 @@ acpi_status acpi_ns_initialize_objects(void) /* Walk entire namespace from the supplied root */ status = acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, acpi_ns_init_one_object, + ACPI_UINT32_MAX, acpi_ns_init_one_object, NULL, &info, NULL); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace")); @@ -156,7 +156,8 @@ acpi_status acpi_ns_initialize_devices(void) status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, FALSE, - acpi_ns_find_ini_methods, &info, NULL); + acpi_ns_find_ini_methods, NULL, &info, + NULL); if (ACPI_FAILURE(status)) { goto error_exit; } @@ -189,7 +190,8 @@ acpi_status acpi_ns_initialize_devices(void) status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, FALSE, - acpi_ns_init_one_device, &info, NULL); + acpi_ns_init_one_device, NULL, &info, + NULL); ACPI_FREE(info.evaluate_info); if (ACPI_FAILURE(status)) { diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c index 35539df5c75d..d7e6b52b4482 100644 --- a/drivers/acpi/acpica/nswalk.c +++ b/drivers/acpi/acpica/nswalk.c @@ -165,24 +165,27 @@ struct acpi_namespace_node *acpi_ns_get_next_node_typed(acpi_object_type type, * max_depth - Depth to which search is to reach * Flags - Whether to unlock the NS before invoking * the callback routine - * user_function - Called when an object of "Type" is found - * Context - Passed to user function - * return_value - from the user_function if terminated early. - * Otherwise, returns NULL. + * pre_order_visit - Called during tree pre-order visit + * when an object of "Type" is found + * post_order_visit - Called during tree post-order visit + * when an object of "Type" is found + * Context - Passed to user function(s) above + * return_value - from the user_function if terminated + * early. Otherwise, returns NULL. * RETURNS: Status * * DESCRIPTION: Performs a modified depth-first walk of the namespace tree, * starting (and ending) at the node specified by start_handle. - * The user_function is called whenever a node that matches - * the type parameter is found. If the user function returns + * The callback function is called whenever a node that matches + * the type parameter is found. If the callback function returns * a non-zero value, the search is terminated immediately and * this value is returned to the caller. * * The point of this procedure is to provide a generic namespace * walk routine that can be called from multiple places to - * provide multiple services; the User Function can be tailored - * to each task, whether it is a print function, a compare - * function, etc. + * provide multiple services; the callback function(s) can be + * tailored to each task, whether it is a print function, + * a compare function, etc. * ******************************************************************************/ @@ -191,7 +194,8 @@ acpi_ns_walk_namespace(acpi_object_type type, acpi_handle start_node, u32 max_depth, u32 flags, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value) { acpi_status status; @@ -200,6 +204,7 @@ acpi_ns_walk_namespace(acpi_object_type type, struct acpi_namespace_node *parent_node; acpi_object_type child_type; u32 level; + u8 node_previously_visited = FALSE; ACPI_FUNCTION_TRACE(ns_walk_namespace); @@ -212,7 +217,7 @@ acpi_ns_walk_namespace(acpi_object_type type, /* Null child means "get first node" */ parent_node = start_node; - child_node = NULL; + child_node = acpi_ns_get_next_node(parent_node, NULL); child_type = ACPI_TYPE_ANY; level = 1; @@ -221,102 +226,129 @@ acpi_ns_walk_namespace(acpi_object_type type, * started. When Level is zero, the loop is done because we have * bubbled up to (and passed) the original parent handle (start_entry) */ - while (level > 0) { + while (level > 0 && child_node) { + status = AE_OK; - /* Get the next node in this scope. Null if not found */ + /* Found next child, get the type if we are not searching for ANY */ - status = AE_OK; - child_node = acpi_ns_get_next_node(parent_node, child_node); - if (child_node) { + if (type != ACPI_TYPE_ANY) { + child_type = child_node->type; + } - /* Found next child, get the type if we are not searching for ANY */ + /* + * Ignore all temporary namespace nodes (created during control + * method execution) unless told otherwise. These temporary nodes + * can cause a race condition because they can be deleted during + * the execution of the user function (if the namespace is + * unlocked before invocation of the user function.) Only the + * debugger namespace dump will examine the temporary nodes. + */ + if ((child_node->flags & ANOBJ_TEMPORARY) && + !(flags & ACPI_NS_WALK_TEMP_NODES)) { + status = AE_CTRL_DEPTH; + } - if (type != ACPI_TYPE_ANY) { - child_type = child_node->type; - } + /* Type must match requested type */ + else if (child_type == type) { /* - * Ignore all temporary namespace nodes (created during control - * method execution) unless told otherwise. These temporary nodes - * can cause a race condition because they can be deleted during - * the execution of the user function (if the namespace is - * unlocked before invocation of the user function.) Only the - * debugger namespace dump will examine the temporary nodes. + * Found a matching node, invoke the user callback function. + * Unlock the namespace if flag is set. */ - if ((child_node->flags & ANOBJ_TEMPORARY) && - !(flags & ACPI_NS_WALK_TEMP_NODES)) { - status = AE_CTRL_DEPTH; + if (flags & ACPI_NS_WALK_UNLOCK) { + mutex_status = + acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(mutex_status)) { + return_ACPI_STATUS(mutex_status); + } } - /* Type must match requested type */ - - else if (child_type == type) { - /* - * Found a matching node, invoke the user callback function. - * Unlock the namespace if flag is set. - */ - if (flags & ACPI_NS_WALK_UNLOCK) { - mutex_status = - acpi_ut_release_mutex - (ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(mutex_status)) { - return_ACPI_STATUS - (mutex_status); - } + /* + * Invoke the user function, either pre-order or post-order + * or both. + */ + if (!node_previously_visited) { + if (pre_order_visit) { + status = + pre_order_visit(child_node, level, + context, + return_value); } + } else { + if (post_order_visit) { + status = + post_order_visit(child_node, level, + context, + return_value); + } + } - status = - user_function(child_node, level, context, - return_value); - - if (flags & ACPI_NS_WALK_UNLOCK) { - mutex_status = - acpi_ut_acquire_mutex - (ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(mutex_status)) { - return_ACPI_STATUS - (mutex_status); - } + if (flags & ACPI_NS_WALK_UNLOCK) { + mutex_status = + acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(mutex_status)) { + return_ACPI_STATUS(mutex_status); } + } - switch (status) { - case AE_OK: - case AE_CTRL_DEPTH: + switch (status) { + case AE_OK: + case AE_CTRL_DEPTH: - /* Just keep going */ - break; + /* Just keep going */ + break; - case AE_CTRL_TERMINATE: + case AE_CTRL_TERMINATE: - /* Exit now, with OK status */ + /* Exit now, with OK status */ - return_ACPI_STATUS(AE_OK); + return_ACPI_STATUS(AE_OK); - default: + default: - /* All others are valid exceptions */ + /* All others are valid exceptions */ - return_ACPI_STATUS(status); - } + return_ACPI_STATUS(status); + } + } + + /* + * Depth first search: Attempt to go down another level in the + * namespace if we are allowed to. Don't go any further if we have + * reached the caller specified maximum depth or if the user + * function has specified that the maximum depth has been reached. + */ + if (!node_previously_visited && + (level < max_depth) && (status != AE_CTRL_DEPTH)) { + if (child_node->child) { + + /* There is at least one child of this node, visit it */ + + level++; + parent_node = child_node; + child_node = + acpi_ns_get_next_node(parent_node, NULL); + continue; } + } - /* - * Depth first search: Attempt to go down another level in the - * namespace if we are allowed to. Don't go any further if we have - * reached the caller specified maximum depth or if the user - * function has specified that the maximum depth has been reached. - */ - if ((level < max_depth) && (status != AE_CTRL_DEPTH)) { - if (child_node->child) { + /* No more children, re-visit this node */ - /* There is at least one child of this node, visit it */ + if (!node_previously_visited) { + node_previously_visited = TRUE; + continue; + } - level++; - parent_node = child_node; - child_node = NULL; - } - } - } else { + /* No more children, visit peers */ + + child_node = acpi_ns_get_next_node(parent_node, child_node); + if (child_node) { + node_previously_visited = FALSE; + } + + /* No peers, re-visit parent */ + + else { /* * No more children of this node (acpi_ns_get_next_node failed), go * back upwards in the namespace tree to the node's parent. @@ -324,6 +356,8 @@ acpi_ns_walk_namespace(acpi_object_type type, level--; child_node = parent_node; parent_node = acpi_ns_get_parent_node(parent_node); + + node_previously_visited = TRUE; } } diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index 4929dbdbc8f0..f2bd1da77001 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -433,8 +433,11 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info) * PARAMETERS: Type - acpi_object_type to search for * start_object - Handle in namespace where search begins * max_depth - Depth to which search is to reach - * user_function - Called when an object of "Type" is found - * Context - Passed to user function + * pre_order_visit - Called during tree pre-order visit + * when an object of "Type" is found + * post_order_visit - Called during tree post-order visit + * when an object of "Type" is found + * Context - Passed to user function(s) above * return_value - Location where return value of * user_function is put if terminated early * @@ -443,16 +446,16 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info) * * DESCRIPTION: Performs a modified depth-first walk of the namespace tree, * starting (and ending) at the object specified by start_handle. - * The user_function is called whenever an object that matches - * the type parameter is found. If the user function returns + * The callback function is called whenever an object that matches + * the type parameter is found. If the callback function returns * a non-zero value, the search is terminated immediately and this * value is returned to the caller. * * The point of this procedure is to provide a generic namespace * walk routine that can be called from multiple places to - * provide multiple services; the User Function can be tailored - * to each task, whether it is a print function, a compare - * function, etc. + * provide multiple services; the callback function(s) can be + * tailored to each task, whether it is a print function, + * a compare function, etc. * ******************************************************************************/ @@ -460,7 +463,8 @@ acpi_status acpi_walk_namespace(acpi_object_type type, acpi_handle start_object, u32 max_depth, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value) { acpi_status status; @@ -469,7 +473,8 @@ acpi_walk_namespace(acpi_object_type type, /* Parameter validation */ - if ((type > ACPI_TYPE_LOCAL_MAX) || (!max_depth) || (!user_function)) { + if ((type > ACPI_TYPE_LOCAL_MAX) || + (!max_depth) || (!pre_order_visit && !post_order_visit)) { return_ACPI_STATUS(AE_BAD_PARAMETER); } @@ -501,8 +506,9 @@ acpi_walk_namespace(acpi_object_type type, } status = acpi_ns_walk_namespace(type, start_object, max_depth, - ACPI_NS_WALK_UNLOCK, user_function, - context, return_value); + ACPI_NS_WALK_UNLOCK, pre_order_visit, + post_order_visit, context, + return_value); (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); @@ -681,8 +687,8 @@ acpi_get_devices(const char *HID, status = acpi_ns_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, ACPI_NS_WALK_UNLOCK, - acpi_ns_get_device_callback, &info, - return_value); + acpi_ns_get_device_callback, NULL, + &info, return_value); (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index 642bb305cb65..5faf6c21257d 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -258,7 +258,7 @@ static int __init acpi_container_init(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - container_walk_namespace_cb, &action, NULL); + container_walk_namespace_cb, NULL, &action, NULL); return (0); } @@ -271,7 +271,7 @@ static void __exit acpi_container_exit(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - container_walk_namespace_cb, &action, NULL); + container_walk_namespace_cb, NULL, &action, NULL); acpi_bus_unregister_driver(&acpi_container_driver); diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 7338b6a3e049..30be3c148f7e 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -1030,8 +1030,8 @@ static int dock_add(acpi_handle handle) /* Find dependent devices */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_dock_devices, dock_station, - NULL); + ACPI_UINT32_MAX, find_dock_devices, NULL, + dock_station, NULL); /* add the dock station as a device dependent on itself */ dd = alloc_dock_dependent_device(handle); @@ -1127,11 +1127,11 @@ static int __init dock_init(void) /* look for a dock station */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_dock, NULL, NULL); + ACPI_UINT32_MAX, find_dock, NULL, NULL, NULL); /* look for bay */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_bay, NULL, NULL); + ACPI_UINT32_MAX, find_bay, NULL, NULL, NULL); if (!dock_station_count) { printk(KERN_INFO PREFIX "No dock devices found.\n"); return 0; diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index baef28c1e630..75b147f5c8fd 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -820,7 +820,7 @@ static int acpi_ec_add(struct acpi_device *device) /* Find and register all query methods */ acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1, - acpi_ec_register_query_methods, ec, NULL); + acpi_ec_register_query_methods, NULL, ec, NULL); if (!first_ec) first_ec = ec; diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index c6645f26224b..4c8fcff662cf 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -113,7 +113,7 @@ acpi_handle acpi_get_child(acpi_handle parent, acpi_integer address) if (!parent) return NULL; acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, - 1, do_acpi_find_child, &find, NULL); + 1, do_acpi_find_child, NULL, &find, NULL); return find.handle; } diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index 45da2bae36c8..11f219743204 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -219,12 +219,12 @@ walk_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) dbg("p2p bridge walk, pci_bus = %x\n", dev->subordinate->number); status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - user_function, &child_context, NULL); + user_function, NULL, &child_context, NULL); if (ACPI_FAILURE(status)) goto out; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - walk_p2p_bridge, &child_context, NULL); + walk_p2p_bridge, NULL, &child_context, NULL); out: pci_dev_put(dev); return AE_OK; @@ -277,12 +277,12 @@ walk_root_bridge(acpi_handle handle, acpi_walk_callback user_function) dbg("root bridge walk, pci_bus = %x\n", pci_bus->number); status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - user_function, &context, NULL); + user_function, NULL, &context, NULL); if (ACPI_FAILURE(status)) return status; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - walk_p2p_bridge, &context, NULL); + walk_p2p_bridge, NULL, &context, NULL); if (ACPI_FAILURE(status)) err("%s: walk_p2p_bridge failure - %d\n", __func__, status); diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index ec742a4e5635..cb4283f5a79d 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -1102,7 +1102,7 @@ void acpi_processor_install_hotplug_notify(void) acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - processor_walk_namespace_cb, &action, NULL); + processor_walk_namespace_cb, NULL, &action, NULL); #endif register_hotcpu_notifier(&acpi_cpu_notifier); } @@ -1115,7 +1115,7 @@ void acpi_processor_uninstall_hotplug_notify(void) acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - processor_walk_namespace_cb, &action, NULL); + processor_walk_namespace_cb, NULL, &action, NULL); #endif unregister_hotcpu_notifier(&acpi_cpu_notifier); } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 14a7481c97d7..ff9f6226085d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1332,7 +1332,7 @@ static int acpi_bus_scan(acpi_handle handle, struct acpi_bus_ops *ops, status = acpi_bus_check_add(handle, 0, ops, &device); if (ACPI_SUCCESS(status)) acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, - acpi_bus_check_add, ops, &device); + acpi_bus_check_add, NULL, ops, &device); if (child) *child = device; diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 575593a8b4e6..8c1b431616df 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -101,7 +101,7 @@ long acpi_is_video_device(struct acpi_device *device) /* Only check for backlight functionality if one of the above hit. */ if (video_caps) acpi_walk_namespace(ACPI_TYPE_DEVICE, device->handle, - ACPI_UINT32_MAX, acpi_backlight_cap_match, + ACPI_UINT32_MAX, acpi_backlight_cap_match, NULL, &video_caps, NULL); return video_caps; @@ -151,7 +151,7 @@ long acpi_video_get_capabilities(acpi_handle graphics_handle) if (!graphics_handle) { /* Only do the global walk through all graphics devices once */ acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_video, + ACPI_UINT32_MAX, find_video, NULL, &caps, NULL); /* There might be boot param flags set already... */ acpi_video_support |= caps; @@ -173,7 +173,7 @@ long acpi_video_get_capabilities(acpi_handle graphics_handle) return 0; } acpi_walk_namespace(ACPI_TYPE_DEVICE, graphics_handle, - ACPI_UINT32_MAX, find_video, + ACPI_UINT32_MAX, find_video, NULL, &caps, NULL); } ACPI_DEBUG_PRINT((ACPI_DB_INFO, "We have 0x%lX video support %s %s\n", diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 05598ae10c4b..eb365021bb5a 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -899,7 +899,7 @@ static int intel_lid_present(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - check_lid_device, &lid_present, NULL); + check_lid_device, NULL, &lid_present, NULL); return lid_present; } diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c index b4a55d407bf5..365e0becaf12 100644 --- a/drivers/i2c/busses/i2c-scmi.c +++ b/drivers/i2c/busses/i2c-scmi.c @@ -363,7 +363,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device) smbus_cmi->cap_write = 0; acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1, - acpi_smbus_cmi_query_methods, smbus_cmi, NULL); + acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL); if (smbus_cmi->cap_info == 0) goto err; diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index a73028ec52e5..0f32571b94df 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -471,7 +471,7 @@ int acpi_pci_detect_ejectable(acpi_handle handle) return found; acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, - check_hotplug, (void *)&found, NULL); + check_hotplug, NULL, (void *)&found, NULL); return found; } EXPORT_SYMBOL_GPL(acpi_pci_detect_ejectable); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 58d25a163a8b..df1b0ea089d1 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -266,7 +266,7 @@ static int detect_ejectable_slots(acpi_handle handle) int found = acpi_pci_detect_ejectable(handle); if (!found) { acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - is_pci_dock_device, (void *)&found, NULL); + is_pci_dock_device, NULL, (void *)&found, NULL); } return found; } @@ -281,7 +281,7 @@ static void init_bridge_misc(struct acpiphp_bridge *bridge) /* register all slot objects under this bridge */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge->handle, (u32)1, - register_slot, bridge, NULL); + register_slot, NULL, bridge, NULL); if (ACPI_FAILURE(status)) { list_del(&bridge->list); return; @@ -447,7 +447,7 @@ find_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) /* search P2P bridges under this p2p bridge */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - find_p2p_bridge, NULL, NULL); + find_p2p_bridge, NULL, NULL, NULL); if (ACPI_FAILURE(status)) warn("find_p2p_bridge failed (error code = 0x%x)\n", status); @@ -485,7 +485,7 @@ static int add_bridge(acpi_handle handle) /* search P2P bridges under this host bridge */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - find_p2p_bridge, NULL, NULL); + find_p2p_bridge, NULL, NULL, NULL); if (ACPI_FAILURE(status)) warn("find_p2p_bridge failed (error code = 0x%x)\n", status); @@ -573,7 +573,7 @@ cleanup_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) /* cleanup p2p bridges under this P2P bridge in a depth-first manner */ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - cleanup_p2p_bridge, NULL, NULL); + cleanup_p2p_bridge, NULL, NULL, NULL); bridge = acpiphp_handle_to_bridge(handle); if (bridge) @@ -589,7 +589,7 @@ static void remove_bridge(acpi_handle handle) /* cleanup p2p bridges under this host bridge in a depth-first manner */ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - (u32)1, cleanup_p2p_bridge, NULL, NULL); + (u32)1, cleanup_p2p_bridge, NULL, NULL, NULL); /* * On root bridges with hotplug slots directly underneath (ie, @@ -778,7 +778,7 @@ static int acpiphp_configure_ioapics(acpi_handle handle) { ioapic_add(handle, 0, NULL, NULL); acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, ioapic_add, NULL, NULL); + ACPI_UINT32_MAX, ioapic_add, NULL, NULL, NULL); return 0; } @@ -786,7 +786,7 @@ static int acpiphp_unconfigure_ioapics(acpi_handle handle) { ioapic_remove(handle, 0, NULL, NULL); acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, ioapic_remove, NULL, NULL); + ACPI_UINT32_MAX, ioapic_remove, NULL, NULL, NULL); return 0; } @@ -1367,7 +1367,7 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont bridge = acpiphp_handle_to_bridge(handle); if (type == ACPI_NOTIFY_BUS_CHECK) { acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, ACPI_UINT32_MAX, - count_sub_bridges, &num_sub_bridges, NULL); + count_sub_bridges, NULL, &num_sub_bridges, NULL); } if (!bridge && !num_sub_bridges) { @@ -1388,7 +1388,7 @@ static void handle_hotplug_event_bridge(acpi_handle handle, u32 type, void *cont } if (num_sub_bridges) acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, - ACPI_UINT32_MAX, check_sub_bridges, NULL, NULL); + ACPI_UINT32_MAX, check_sub_bridges, NULL, NULL, NULL); break; case ACPI_NOTIFY_DEVICE_CHECK: @@ -1512,7 +1512,7 @@ int __init acpiphp_glue_init(void) int num = 0; acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, find_root_bridges, &num, NULL); + ACPI_UINT32_MAX, find_root_bridges, NULL, &num, NULL); if (num <= 0) return -1; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index e7be66dbac21..aa5df485f8cf 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -434,7 +434,7 @@ static int __init ibm_acpiphp_init(void) dbg("%s\n", __func__); if (acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, ibm_find_acpi_device, + ACPI_UINT32_MAX, ibm_find_acpi_device, NULL, &ibm_acpi_handle, NULL) != FOUND_APCI) { err("%s: acpi_walk_namespace failed\n", __func__); retval = -ENODEV; diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c index 29432a50be45..f0a90a6bf396 100644 --- a/drivers/platform/x86/intel_menlow.c +++ b/drivers/platform/x86/intel_menlow.c @@ -510,7 +510,7 @@ static int __init intel_menlow_module_init(void) /* Looking for sensors in each ACPI thermal zone */ status = acpi_walk_namespace(ACPI_TYPE_THERMAL, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - intel_menlow_register_sensor, NULL, NULL); + intel_menlow_register_sensor, NULL, NULL, NULL); if (ACPI_FAILURE(status)) return -ENODEV; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index a2a742c8ff7e..7a2cc8a5c975 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1203,7 +1203,7 @@ static int sony_nc_add(struct acpi_device *device) if (debug) { status = acpi_walk_namespace(ACPI_TYPE_METHOD, sony_nc_acpi_handle, - 1, sony_walk_callback, NULL, NULL); + 1, sony_walk_callback, NULL, NULL, NULL); if (ACPI_FAILURE(status)) { printk(KERN_WARNING DRV_PFX "unable to walk acpi resources\n"); result = -ENODEV; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d93108d148fc..22cb50fe2b2c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1087,7 +1087,7 @@ static int __init tpacpi_check_std_acpi_brightness_support(void) */ status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3, - tpacpi_acpi_walk_find_bcl, NULL, + tpacpi_acpi_walk_find_bcl, NULL, NULL, &bcl_ptr); if (ACPI_SUCCESS(status) && bcl_levels > 2) { diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 0e7efeacf6cb..f9b8b28ad802 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -154,7 +154,8 @@ acpi_status acpi_walk_namespace(acpi_object_type type, acpi_handle start_object, u32 max_depth, - acpi_walk_callback user_function, + acpi_walk_callback pre_order_visit, + acpi_walk_callback post_order_visit, void *context, void **return_value); acpi_status -- cgit v1.2.3 From 273bee27fa9f79d94b78c83506016f2e41e78983 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 25 Nov 2009 08:46:28 +0900 Subject: x86: Fix iommu=soft boot option iommu=soft boot option forces the kernel to use swiotlb. ( This has the side-effect of enabling the swiotlb over the GART if this boot option is provided. This is the desired behavior of the swiotlb boot option and works like that for all other hw-IOMMU drivers. ) Signed-off-by: FUJITA Tomonori Cc: yinghai@kernel.org LKML-Reference: <20091125084611O.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-swiotlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index e36e71daa44c..e3c0a66b9e77 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -50,6 +50,8 @@ static struct dma_map_ops swiotlb_dma_ops = { */ int __init pci_swiotlb_init(void) { + int use_swiotlb = swiotlb | swiotlb_force; + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 if (!no_iommu && max_pfn > MAX_DMA32_PFN) @@ -63,5 +65,5 @@ int __init pci_swiotlb_init(void) dma_ops = &swiotlb_dma_ops; } - return swiotlb_force; + return use_swiotlb; } -- cgit v1.2.3 From 28b4e0d86acf59ae3bc422921138a4958458326e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Nov 2009 22:24:44 +0900 Subject: x86: Rename global percpu symbol dr7 to cpu_dr7 Percpu symbols now occupy the same namespace as other global symbols and as such short global symbols without subsystem prefix tend to collide with local variables. dr7 percpu variable used by x86 was hit by this. Rename it to cpu_dr7. The rename also makes it more consistent with its fellow cpu_debugreg percpu variable. Signed-off-by: Tejun Heo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Rusty Russell Cc: Christoph Lameter Cc: Linus Torvalds , Cc: Andrew Morton LKML-Reference: <20091125115856.GA17856@elte.hu> Signed-off-by: Ingo Molnar Reported-by: Stephen Rothwell --- arch/x86/include/asm/debugreg.h | 4 ++-- arch/x86/kernel/hw_breakpoint.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index fdabd8435765..8240f76b531e 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,7 +75,7 @@ */ #ifdef __KERNEL__ -DECLARE_PER_CPU(unsigned long, dr7); +DECLARE_PER_CPU(unsigned long, cpu_dr7); static inline void hw_breakpoint_disable(void) { @@ -91,7 +91,7 @@ static inline void hw_breakpoint_disable(void) static inline int hw_breakpoint_active(void) { - return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; + return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } extern void aout_dump_debugregs(struct user *dump); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 4d267fb77828..92ea5aad0b5c 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -46,8 +46,8 @@ #include /* Per cpu debug control register value */ -DEFINE_PER_CPU(unsigned long, dr7); -EXPORT_PER_CPU_SYMBOL(dr7); +DEFINE_PER_CPU(unsigned long, cpu_dr7); +EXPORT_PER_CPU_SYMBOL(cpu_dr7); /* Per cpu debug address registers values */ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); @@ -118,7 +118,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) set_debugreg(info->address, i); __get_cpu_var(cpu_debugreg[i]) = info->address; - dr7 = &__get_cpu_var(dr7); + dr7 = &__get_cpu_var(cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -153,7 +153,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = &__get_cpu_var(dr7); + dr7 = &__get_cpu_var(cpu_dr7); *dr7 &= ~encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -437,7 +437,7 @@ void hw_breakpoint_restore(void) set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); set_debugreg(current->thread.debugreg6, 6); - set_debugreg(__get_cpu_var(dr7), 7); + set_debugreg(__get_cpu_var(cpu_dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); -- cgit v1.2.3 From b803090615ccec669681ff85ce28671e7bfefa3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 08:17:31 +0100 Subject: x86: dumpstack: Clean up the x86_stack_ids[][] initalization and other details Make the initialization more readable, plus tidy up a few small visual details as well. No change in functionality. LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_32.c | 9 ++++----- arch/x86/kernel/dumpstack_64.c | 25 +++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf4..e0ed4c7abb62 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include #include @@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; + stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; @@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); + bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177e..cfec478a4e42 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -10,26 +10,28 @@ #include #include #include +#include #include #include -#include #include #include "dumpstack.h" +#define N_EXCEPTION_STACKS_END \ + (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { - [DEBUG_STACK - 1] = "#DB", - [NMI_STACK - 1] = "NMI", - [DOUBLEFAULT_STACK - 1] = "#DF", - [STACKFAULT_STACK - 1] = "#SS", - [MCE_STACK - 1] = "#MC", + [ DEBUG_STACK-1 ] = "#DB", + [ NMI_STACK-1 ] = "NMI", + [ DOUBLEFAULT_STACK-1 ] = "#DF", + [ STACKFAULT_STACK-1 ] = "#SS", + [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [ N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS_END ] = "#DB[?]" #endif - }; +}; int x86_is_stack_id(int id, char *name) { @@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) + unsigned *usedp, char **idp) { unsigned k; @@ -202,7 +204,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -303,4 +305,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - -- cgit v1.2.3 From 67f2de0bf9141dd9fe9189d0caaa28d7ad21a523 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 08:29:10 +0100 Subject: x86: dumpstack, 64-bit: Disable preemption when walking the IRQ/exception stacks This warning: [ 847.140022] rb_producer D 0000000000000000 5928 519 2 0x00000000 [ 847.203627] BUG: using smp_processor_id() in preemptible [00000000] code: khungtaskd/517 [ 847.207360] caller is show_stack_log_lvl+0x2e/0x241 [ 847.210364] Pid: 517, comm: khungtaskd Not tainted 2.6.32-rc8-tip+ #13761 [ 847.213395] Call Trace: [ 847.215847] [] debug_smp_processor_id+0x1f0/0x20a [ 847.216809] [] show_stack_log_lvl+0x2e/0x241 [ 847.220027] [] show_stack+0x1c/0x1e [ 847.223365] [] sched_show_task+0xe4/0xe9 [ 847.226694] [] check_hung_task+0x140/0x199 [ 847.230261] [] check_hung_uninterruptible_tasks+0x1b7/0x20f [ 847.233371] [] ? watchdog+0x0/0x50 [ 847.236683] [] watchdog+0x4e/0x50 [ 847.240034] [] kthread+0x97/0x9f [ 847.243372] [] child_rip+0xa/0x20 [ 847.246690] [] ? restore_args+0x0/0x30 [ 847.250019] [] ? _spin_lock+0xe/0x10 [ 847.253351] [] ? kthread+0x0/0x9f [ 847.256833] [] ? child_rip+0x0/0x20 Happens because on preempt-RCU, khungd calls show_stack() with preemption enabled. Make sure we are not preemptible while walking the IRQ and exception stacks on 64-bit. (32-bit stack dumping is preemption safe.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack_64.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index cfec478a4e42..8e740934bd1f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -206,19 +206,22 @@ void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { + unsigned long *irq_stack_end; + unsigned long *irq_stack; unsigned long *stack; + int cpu; int i; - const int cpu = smp_processor_id(); - unsigned long *irq_stack_end = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - unsigned long *irq_stack = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); + + preempt_disable(); + cpu = smp_processor_id(); + + irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* - * debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu. + * Debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu: */ - if (sp == NULL) { if (task) sp = (unsigned long *)task->thread.sp; @@ -242,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + preempt_enable(); + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } -- cgit v1.2.3 From 2d4dc890b5c8fabd818a8586607e6843c4375e62 Mon Sep 17 00:00:00 2001 From: Ilya Loginov Date: Thu, 26 Nov 2009 09:16:19 +0100 Subject: block: add helpers to run flush_dcache_page() against a bio and a request's pages Mtdblock driver doesn't call flush_dcache_page for pages in request. So, this causes problems on architectures where the icache doesn't fill from the dcache or with dcache aliases. The patch fixes this. The ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE symbol was introduced to avoid pointless empty cache-thrashing loops on architectures for which flush_dcache_page() is a no-op. Every architecture was provided with this flush pages on architectires where ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE is equal 1 or do nothing otherwise. See "fix mtd_blkdevs problem with caches on some architectures" discussion on LKML for more information. Signed-off-by: Ilya Loginov Cc: Ingo Molnar Cc: David Woodhouse Cc: Peter Horton Cc: "Ed L. Cashin" Signed-off-by: Jens Axboe --- arch/alpha/include/asm/cacheflush.h | 1 + arch/arm/include/asm/cacheflush.h | 1 + arch/avr32/include/asm/cacheflush.h | 1 + arch/blackfin/include/asm/cacheflush.h | 2 ++ arch/cris/include/asm/cacheflush.h | 1 + arch/frv/include/asm/cacheflush.h | 1 + arch/h8300/include/asm/cacheflush.h | 1 + arch/ia64/include/asm/cacheflush.h | 1 + arch/m32r/include/asm/cacheflush.h | 3 +++ arch/m68k/include/asm/cacheflush_mm.h | 1 + arch/m68k/include/asm/cacheflush_no.h | 1 + arch/microblaze/include/asm/cacheflush.h | 1 + arch/mips/include/asm/cacheflush.h | 1 + arch/mn10300/include/asm/cacheflush.h | 1 + arch/parisc/include/asm/cacheflush.h | 1 + arch/powerpc/include/asm/cacheflush.h | 1 + arch/s390/include/asm/cacheflush.h | 1 + arch/score/include/asm/cacheflush.h | 1 + arch/sh/include/asm/cacheflush.h | 1 + arch/sparc/include/asm/cacheflush_32.h | 1 + arch/sparc/include/asm/cacheflush_64.h | 1 + arch/x86/include/asm/cacheflush.h | 1 + arch/xtensa/include/asm/cacheflush.h | 1 + block/blk-core.c | 19 +++++++++++++++++++ drivers/mtd/mtd_blkdevs.c | 2 ++ fs/bio.c | 12 ++++++++++++ include/asm-generic/cacheflush.h | 1 + include/linux/bio.h | 12 ++++++++++++ include/linux/blkdev.h | 11 +++++++++++ 29 files changed, 83 insertions(+) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h index b686cc7fc44e..01d71e1c8a9e 100644 --- a/arch/alpha/include/asm/cacheflush.h +++ b/arch/alpha/include/asm/cacheflush.h @@ -9,6 +9,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index fd03fb63a332..247b7b0adc2a 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -408,6 +408,7 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, * about to change to user space. This is the same method as used on SPARC64. * See update_mmu_cache for the user space part. */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); extern void __flush_dcache_page(struct address_space *mapping, struct page *page); diff --git a/arch/avr32/include/asm/cacheflush.h b/arch/avr32/include/asm/cacheflush.h index 670674749b20..96e53820bbbd 100644 --- a/arch/avr32/include/asm/cacheflush.h +++ b/arch/avr32/include/asm/cacheflush.h @@ -107,6 +107,7 @@ extern void flush_icache_page(struct vm_area_struct *vma, struct page *page); * do something here, but only for certain configurations. No such * configurations exist at this time. */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(page) do { } while (0) #define flush_dcache_mmap_unlock(page) do { } while (0) diff --git a/arch/blackfin/include/asm/cacheflush.h b/arch/blackfin/include/asm/cacheflush.h index af03a36c7a4e..417eaac7fe99 100644 --- a/arch/blackfin/include/asm/cacheflush.h +++ b/arch/blackfin/include/asm/cacheflush.h @@ -68,9 +68,11 @@ do { memcpy(dst, src, len); \ #endif #if defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK) # define flush_dcache_range(start,end) blackfin_dcache_flush_range((start), (end)) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 # define flush_dcache_page(page) blackfin_dflush_page(page_address(page)) #else # define flush_dcache_range(start,end) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 # define flush_dcache_page(page) do { } while (0) #endif diff --git a/arch/cris/include/asm/cacheflush.h b/arch/cris/include/asm/cacheflush.h index cf60e3f69f8d..36795bca605e 100644 --- a/arch/cris/include/asm/cacheflush.h +++ b/arch/cris/include/asm/cacheflush.h @@ -12,6 +12,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/frv/include/asm/cacheflush.h b/arch/frv/include/asm/cacheflush.h index 432a69e7f3d4..edbac54ae015 100644 --- a/arch/frv/include/asm/cacheflush.h +++ b/arch/frv/include/asm/cacheflush.h @@ -47,6 +47,7 @@ static inline void __flush_cache_all(void) } /* dcache/icache coherency... */ +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #ifdef CONFIG_MMU extern void flush_dcache_page(struct page *page); #else diff --git a/arch/h8300/include/asm/cacheflush.h b/arch/h8300/include/asm/cacheflush.h index 5ffdca217b95..4cf2df20c1ce 100644 --- a/arch/h8300/include/asm/cacheflush.h +++ b/arch/h8300/include/asm/cacheflush.h @@ -15,6 +15,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma,a,b) #define flush_cache_page(vma,p,pfn) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) #define flush_dcache_mmap_lock(mapping) #define flush_dcache_mmap_unlock(mapping) diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h index c8ce2719fee8..429eefc93ee7 100644 --- a/arch/ia64/include/asm/cacheflush.h +++ b/arch/ia64/include/asm/cacheflush.h @@ -25,6 +25,7 @@ #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) \ do { \ clear_bit(PG_arch_1, &(page)->flags); \ diff --git a/arch/m32r/include/asm/cacheflush.h b/arch/m32r/include/asm/cacheflush.h index 78587c958146..8e8e04516c39 100644 --- a/arch/m32r/include/asm/cacheflush.h +++ b/arch/m32r/include/asm/cacheflush.h @@ -12,6 +12,7 @@ extern void _flush_cache_copyback_all(void); #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) @@ -33,6 +34,7 @@ extern void smp_flush_cache_all(void); #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) @@ -46,6 +48,7 @@ extern void smp_flush_cache_all(void); #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h index 16bf375fdbe1..73de7c89d8e0 100644 --- a/arch/m68k/include/asm/cacheflush_mm.h +++ b/arch/m68k/include/asm/cacheflush_mm.h @@ -128,6 +128,7 @@ static inline void __flush_page_to_ram(void *vaddr) } } +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/m68k/include/asm/cacheflush_no.h b/arch/m68k/include/asm/cacheflush_no.h index c65f00a94553..89f195656be7 100644 --- a/arch/m68k/include/asm/cacheflush_no.h +++ b/arch/m68k/include/asm/cacheflush_no.h @@ -12,6 +12,7 @@ #define flush_cache_range(vma, start, end) __flush_cache_all() #define flush_cache_page(vma, vmaddr) do { } while (0) #define flush_dcache_range(start,len) __flush_cache_all() +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h index f989d6aad648..088076e657b3 100644 --- a/arch/microblaze/include/asm/cacheflush.h +++ b/arch/microblaze/include/asm/cacheflush.h @@ -37,6 +37,7 @@ #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) #define flush_dcache_range(start, end) __invalidate_dcache_range(start, end) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index 03b1d69b142f..40bb9fde205f 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -38,6 +38,7 @@ extern void (*flush_cache_range)(struct vm_area_struct *vma, extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn); extern void __flush_dcache_page(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 static inline void flush_dcache_page(struct page *page) { if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc) diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h index 1a55d61f0d06..29e692f7f030 100644 --- a/arch/mn10300/include/asm/cacheflush.h +++ b/arch/mn10300/include/asm/cacheflush.h @@ -26,6 +26,7 @@ #define flush_cache_page(vma, vmaddr, pfn) do {} while (0) #define flush_cache_vmap(start, end) do {} while (0) #define flush_cache_vunmap(start, end) do {} while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do {} while (0) #define flush_dcache_mmap_lock(mapping) do {} while (0) #define flush_dcache_mmap_unlock(mapping) do {} while (0) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 724395143f26..7a73b615c23d 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -42,6 +42,7 @@ void flush_cache_mm(struct mm_struct *mm); #define flush_cache_vmap(start, end) flush_cache_all() #define flush_cache_vunmap(start, end) flush_cache_all() +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) \ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index ba667a383b8c..ab9e402518e8 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -25,6 +25,7 @@ #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h index 49d5af916d01..405cc97c6249 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/cacheflush.h @@ -10,6 +10,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/score/include/asm/cacheflush.h b/arch/score/include/asm/cacheflush.h index 07cc8fc457cd..caaba24036e3 100644 --- a/arch/score/include/asm/cacheflush.h +++ b/arch/score/include/asm/cacheflush.h @@ -16,6 +16,7 @@ extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_dcache_range(unsigned long start, unsigned long end); #define flush_cache_dup_mm(mm) do {} while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do {} while (0) #define flush_dcache_mmap_lock(mapping) do {} while (0) #define flush_dcache_mmap_unlock(mapping) do {} while (0) diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index c29918f3c819..dda96eb3e7c0 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -42,6 +42,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_icache_page(struct vm_area_struct *vma, diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h index 68ac10910271..2e468773f250 100644 --- a/arch/sparc/include/asm/cacheflush_32.h +++ b/arch/sparc/include/asm/cacheflush_32.h @@ -75,6 +75,7 @@ BTFIXUPDEF_CALL(void, flush_sig_insns, struct mm_struct *, unsigned long) extern void sparc_flush_page_to_ram(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define flush_dcache_page(page) sparc_flush_page_to_ram(page) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h index c43321729b3b..b95384033e89 100644 --- a/arch/sparc/include/asm/cacheflush_64.h +++ b/arch/sparc/include/asm/cacheflush_64.h @@ -37,6 +37,7 @@ extern void flush_dcache_page_all(struct mm_struct *mm, struct page *page); #endif extern void __flush_dcache_range(unsigned long start, unsigned long end); +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); #define flush_icache_page(vma, pg) do { } while(0) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b54f6afe7ec4..9076add593a8 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { } +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 static inline void flush_dcache_page(struct page *page) { } static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index b7b8fbe47c77..a508f2f73bd7 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -101,6 +101,7 @@ static inline void __invalidate_icache_page_alias(unsigned long virt, #define flush_cache_vmap(start,end) flush_cache_all() #define flush_cache_vunmap(start,end) flush_cache_all() +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page*); extern void flush_cache_range(struct vm_area_struct*, ulong, ulong); extern void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned long); diff --git a/block/blk-core.c b/block/blk-core.c index 71da5111120c..718897e6d37f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2358,6 +2358,25 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->rq_disk = bio->bi_bdev->bd_disk; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +/** + * rq_flush_dcache_pages - Helper function to flush all pages in a request + * @rq: the request to be flushed + * + * Description: + * Flush all pages in @rq. + */ +void rq_flush_dcache_pages(struct request *rq) +{ + struct req_iterator iter; + struct bio_vec *bvec; + + rq_for_each_segment(bvec, rq, iter) + flush_dcache_page(bvec->bv_page); +} +EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); +#endif + /** * blk_lld_busy - Check if underlying low-level drivers of a device are busy * @q : the queue of the device being checked diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 8ca17a3e96ea..64e2b379a350 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -59,12 +59,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->readsect(dev, block, buf)) return -EIO; + rq_flush_dcache_pages(req); return 0; case WRITE: if (!tr->writesect) return -EIO; + rq_flush_dcache_pages(req); for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->writesect(dev, block, buf)) return -EIO; diff --git a/fs/bio.c b/fs/bio.c index 12da5db8682c..e23a63f4f7de 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1393,6 +1393,18 @@ void bio_check_pages_dirty(struct bio *bio) } } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +void bio_flush_dcache_pages(struct bio *bi) +{ + int i; + struct bio_vec *bvec; + + bio_for_each_segment(bvec, bi, i) + flush_dcache_page(bvec->bv_page); +} +EXPORT_SYMBOL(bio_flush_dcache_pages); +#endif + /** * bio_endio - end I/O on a bio * @bio: bio diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index ba4ec39a1131..57b5c3c82e86 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -13,6 +13,7 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) diff --git a/include/linux/bio.h b/include/linux/bio.h index 474792b825d0..7fc5606e6ea5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -391,6 +391,18 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); + +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" +#endif +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +extern void bio_flush_dcache_pages(struct bio *bi); +#else +static inline void bio_flush_dcache_pages(struct bio *bi) +{ +} +#endif + extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cc02972fbe2..e727f6c44c44 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -752,6 +752,17 @@ struct req_iterator { #define rq_iter_last(rq, _iter) \ (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1) +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" +#endif +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +extern void rq_flush_dcache_pages(struct request *rq); +#else +static inline void rq_flush_dcache_pages(struct request *rq) +{ +} +#endif + extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); -- cgit v1.2.3 From 605bfaee9078cd0b01d83402315389839ee4bb5c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Nov 2009 05:35:42 +0100 Subject: hw-breakpoints: Simplify error handling in breakpoint creation requests This simplifies the error handling when we create a breakpoint. We don't need to check the NULL return value corner case anymore since we have improved perf_event_create_kernel_counter() to always return an error code in the failure case. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Prasad LKML-Reference: <1259210142-5714-3-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 8 +------- kernel/hw_breakpoint.c | 4 ++-- kernel/trace/trace_ksym.c | 16 ++++------------ samples/hw_breakpoint/data_breakpoint.c | 3 --- 4 files changed, 7 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b25f8947ed7a..75e0cd847bd6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -657,10 +657,7 @@ restore: tsk, true); thread->ptrace_bps[i] = NULL; - if (!bp) { /* incorrect bp, or we have a bug in bp API */ - rc = -EINVAL; - break; - } + /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { rc = PTR_ERR(bp); bp = NULL; @@ -729,9 +726,6 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, tsk, bp->attr.disabled); } - - if (!bp) - return -EIO; /* * CHECKME: the previous code returned -EIO if the addr wasn't a * valid task virtual addr. The new one will return -EINVAL in this diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 06d372fc026d..dd3fb4a999d3 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -442,7 +442,7 @@ register_wide_hw_breakpoint(unsigned long addr, *pevent = bp; - if (IS_ERR(bp) || !bp) { + if (IS_ERR(bp)) { err = PTR_ERR(bp); goto fail; } @@ -453,7 +453,7 @@ register_wide_hw_breakpoint(unsigned long addr, fail: for_each_possible_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - if (IS_ERR(*pevent) || !*pevent) + if (IS_ERR(*pevent)) break; unregister_hw_breakpoint(*pevent); } diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 9f040e42f516..c538b15b95d6 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -200,12 +200,9 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, entry->len, entry->type, ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) { - entry->ksym_hbp = NULL; ret = PTR_ERR(entry->ksym_hbp); - } - - if (!entry->ksym_hbp) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); goto err; @@ -332,21 +329,16 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (changed) { unregister_wide_hw_breakpoint(entry->ksym_hbp); entry->type = op; + ret = 0; if (op > 0) { entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, entry->len, entry->type, ksym_hbp_handler, true); if (IS_ERR(entry->ksym_hbp)) - entry->ksym_hbp = NULL; - - /* modified without problem */ - if (entry->ksym_hbp) { - ret = 0; + ret = PTR_ERR(entry->ksym_hbp); + else goto out; - } - } else { - ret = 0; } /* Error or "symbol:---" case: drop it */ ksym_filter_entry_count--; diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 95063818bcf4..ee7f9fbaffbd 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -61,9 +61,6 @@ static int __init hw_break_module_init(void) if (IS_ERR(sample_hbp)) { ret = PTR_ERR(sample_hbp); goto fail; - } else if (!sample_hbp) { - ret = -EINVAL; - goto fail; } printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); -- cgit v1.2.3 From 2c31b7958fd21df9fa04e5c36cda0f063ac70b27 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Nov 2009 06:04:38 +0100 Subject: x86/hw-breakpoints: Don't lose GE flag while disabling a breakpoint When we schedule out a breakpoint from the cpu, we also incidentally remove the "Global exact breakpoint" flag from the breakpoint control register. It makes us losing the fine grained precision about the origin of the instructions that may trigger breakpoint exceptions for the other breakpoints running in this cpu. Reported-by: Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259211878-6013-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/hw_breakpoint.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 92ea5aad0b5c..d42f65ac4927 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -59,21 +59,27 @@ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); -/* - * Encode the length, type, Exact, and Enable bits for a particular breakpoint - * as stored in debug register 7. - */ -unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +static inline unsigned long +__encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; bp_info = (len | type) & 0xf; bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); - bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | - DR_GLOBAL_SLOWDOWN; + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + return bp_info; } +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; +} + /* * Decode the length and type bits for a particular breakpoint as * stored in debug register 7. Return the "enabled" status. @@ -154,7 +160,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) return; dr7 = &__get_cpu_var(cpu_dr7); - *dr7 &= ~encode_dr7(i, info->len, info->type); + *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); } -- cgit v1.2.3 From 9b3660a55a9052518c91cc7c62d89e22f3f6f490 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 17 Nov 2009 18:22:16 -0600 Subject: x86: Limit number of per cpu TSC sync messages Limit the number of per cpu TSC sync messages by only printing to the console if an error occurs, otherwise print as a DEBUG message. The info message "Skipping synchronization ..." is only printed after the last cpu has booted. Signed-off-by: Mike Travis Cc: Heiko Carstens Cc: Roland Dreier Cc: Randy Dunlap Cc: Tejun Heo Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: <20091118002222.181053000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc_sync.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d15..eed156851f5d 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); + if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) + pr_info( + "Skipped synchronization checks as TSC is reliable.\n"); return; } - pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); - /* * Reset it - in case this is a second bootup: */ @@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) cpu_relax(); if (nr_warps) { - printk("\n"); + pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", + smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { - printk(" passed.\n"); + pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + smp_processor_id(), cpu); } /* -- cgit v1.2.3 From 767df1bdd8cbff2c8c40c9ac8295bbdaa5fb24c4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 17:29:02 +0900 Subject: x86, mce: Add __cpuinit to hotplug callback functions The mce_disable_cpu() and mce_reenable_cpu() are called only from mce_cpu_callback() which is marked as __cpuinit. So these functions can be __cpuinit too. Signed-off-by: Hidetoshi Seto Cc: Andi Kleen LKML-Reference: <4B0E3C4E.4090809@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5f277cad2ed7..0bcaa3875863 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1953,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void __cpuinit mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -1970,7 +1971,7 @@ static void mce_disable_cpu(void *h) } } -static void mce_reenable_cpu(void *h) +static void __cpuinit mce_reenable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; -- cgit v1.2.3 From 79b0379cee09b00ef309384aff652e328e438c79 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 25 Nov 2009 14:18:26 -0500 Subject: x86: Optimize loadsegment() Zero the input register in the exception handler instead of using an extra register to pass in a zero value. Signed-off-by: Brian Gerst LKML-Reference: <1259176706-5908-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 1a953e26401c..537395a2877a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -156,18 +156,19 @@ extern void native_load_gs_index(unsigned); * segment if something goes wrong.. */ #define loadsegment(seg, value) \ +do { \ + unsigned short __val = value; \ asm volatile("\n" \ "1:\t" \ "movl %k0,%%" #seg "\n" \ - "2:\n" \ ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "movl %k1, %%" #seg "\n\t" \ - "jmp 2b\n" \ + "2:\t" \ + "xorl %k0,%k0\n\t" \ + "jmp 1b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b,3b) \ - : :"r" (value), "r" (0) : "memory") - + _ASM_EXTABLE(1b, 2b) \ + : "+r" (__val) : : "memory"); \ +} while (0) /* * Save a segment register away -- cgit v1.2.3 From 64b028b22616946a05bf9580f7f7f7ee2ac070b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 10:37:55 +0100 Subject: x86: Clean up the loadsegment() macro Make it readable in the source too, not just in the assembly output. No change in functionality. Cc: Brian Gerst LKML-Reference: <1259176706-5908-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 537395a2877a..022a84386de8 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -155,19 +155,21 @@ extern void native_load_gs_index(unsigned); * Load a segment. Fall back on loading the zero * segment if something goes wrong.. */ -#define loadsegment(seg, value) \ -do { \ - unsigned short __val = value; \ - asm volatile("\n" \ - "1:\t" \ - "movl %k0,%%" #seg "\n" \ - ".section .fixup,\"ax\"\n" \ - "2:\t" \ - "xorl %k0,%k0\n\t" \ - "jmp 1b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 2b) \ - : "+r" (__val) : : "memory"); \ +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ } while (0) /* -- cgit v1.2.3 From 8ec6993d9f7d961014af970ded57542961fe9ad9 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 25 Nov 2009 11:17:36 -0500 Subject: x86, 64-bit: Set data segments to null after switching to 64-bit mode This prevents kernel threads from inheriting non-null segment selectors, and causing optimizations in __switch_to() to be ineffective. Signed-off-by: Brian Gerst Cc: Tim Blechmann Cc: Linus Torvalds Cc: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Jan Beulich LKML-Reference: <1259165856-3512-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 26406601031c..17ba9ecf27c5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) */ lgdt early_gdt_descr(%rip) - /* set up data segments. actually 0 would do too */ - movl $__KERNEL_DS,%eax + /* set up data segments */ + xorl %eax,%eax movl %eax,%ds movl %eax,%ss movl %eax,%es -- cgit v1.2.3 From 918bc960dc630b1a79c0d2991a81985812ff69f5 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 25 Nov 2009 10:20:19 -0600 Subject: x86: SGI UV: Map low MMR ranges Explicitly mmap the UV chipset MMR address ranges used to access blade-local registers. Although these same MMRs are also mmaped at higher addresses, the low range is more convenient when accessing blade-local registers. The low range addresses always alias to the local blade regardless of the blade id. Signed-off-by: Jack Steiner LKML-Reference: <20091125162018.GA25445@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886a6b53..6d425490fb1f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode) map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); } +static __init void map_low_mmrs(void) +{ + init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); + init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); +} + static __init void uv_rtc_init(void) { long status; @@ -550,6 +556,8 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; + map_low_mmrs(); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; -- cgit v1.2.3 From 5fa10b28e57f94a90535cfeafe89dcee9f47d540 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:53 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define user breakpoints In-kernel user breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across archictectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 74 ++++++++++++++++++++---------------- include/linux/hw_breakpoint.h | 36 ++++++++---------- kernel/hw_breakpoint.c | 87 +++++++++---------------------------------- 3 files changed, 75 insertions(+), 122 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 75e0cd847bd6..2941b32ea666 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -593,6 +593,34 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } +static struct perf_event * +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk) +{ + int err; + int gen_len, gen_type; + DEFINE_BREAKPOINT_ATTR(attr); + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return ERR_PTR(-EINVAL); + + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return ERR_PTR(err); + + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = 0; + + return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); +} + /* * Handle ptrace writes to debug register 7. */ @@ -603,7 +631,6 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - int gen_len, gen_type; struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; @@ -634,33 +661,12 @@ restore: continue; } - /* - * We shoud have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) { - rc = -EINVAL; - break; - } - - rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); - if (rc) - break; - - /* - * This is a temporary thing as bp is unregistered/registered - * to simulate modification - */ - bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, - gen_type, bp->callback, - tsk, true); - thread->ptrace_bps[i] = NULL; + bp = ptrace_modify_breakpoint(bp, len, type, tsk); /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { rc = PTR_ERR(bp); - bp = NULL; + thread->ptrace_bps[i] = NULL; break; } thread->ptrace_bps[i] = bp; @@ -707,24 +713,26 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, { struct perf_event *bp; struct thread_struct *t = &tsk->thread; + DEFINE_BREAKPOINT_ATTR(attr); if (!t->ptrace_bps[nr]) { /* * Put stub len and type to register (reserve) an inactive but * correct bp */ - bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, - HW_BREAKPOINT_W, - ptrace_triggered, tsk, - false); + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); } else { bp = t->ptrace_bps[nr]; t->ptrace_bps[nr] = NULL; - bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, - bp->attr.bp_type, - bp->callback, - tsk, - bp->attr.disabled); + + attr = bp->attr; + attr.bp_addr = addr; + bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } /* * CHECKME: the previous code returned -EIO if the addr wasn't a diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index c9f7f7c7b0e0..5da472e434b7 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -20,6 +20,14 @@ enum { #ifdef CONFIG_HAVE_HW_BREAKPOINT +/* As it's for in-kernel or ptrace use, we want it to be pinned */ +#define DEFINE_BREAKPOINT_ATTR(name) \ +struct perf_event_attr name = { \ + .type = PERF_TYPE_BREAKPOINT, \ + .size = sizeof(name), \ + .pinned = 1, \ +}; + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -36,22 +44,16 @@ static inline int hw_breakpoint_len(struct perf_event *bp) } extern struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ extern struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* * Kernel breakpoints are not associated with any particular thread. @@ -89,20 +91,14 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(unsigned long addr, int len, diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 32e1018191be..2a47514f12fd 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -289,90 +289,32 @@ int register_perf_hw_breakpoint(struct perf_event *bp) return __register_perf_hw_breakpoint(bp); } -/* - * Register a breakpoint bound to a task and a given cpu. - * If cpu is -1, the breakpoint is active for the task in every cpu - * If the task is -1, the breakpoint is active for every tasks in the given - * cpu. - */ -static struct perf_event * -register_user_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - pid_t pid, - int cpu, - bool active) -{ - struct perf_event_attr *attr; - struct perf_event *bp; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) - return ERR_PTR(-ENOMEM); - - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(*attr); - attr->bp_addr = addr; - attr->bp_len = len; - attr->bp_type = type; - /* - * Such breakpoints are used by debuggers to trigger signals when - * we hit the excepted memory op. We can't miss such events, they - * must be pinned. - */ - attr->pinned = 1; - - if (!active) - attr->disabled = 1; - - bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); - kfree(attr); - - return bp; -} - /** * register_user_hw_breakpoint - register a hardware breakpoint for user space - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it - * */ struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) + struct task_struct *tsk) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - tsk->pid, -1, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: new breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it */ struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) + struct task_struct *tsk) { /* * FIXME: do it without unregistering @@ -381,8 +323,7 @@ modify_user_hw_breakpoint(struct perf_event *bp, */ unregister_hw_breakpoint(bp); - return register_user_hw_breakpoint(addr, len, type, triggered, - tsk, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); @@ -406,8 +347,16 @@ register_kernel_hw_breakpoint_cpu(unsigned long addr, int cpu, bool active) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - -1, cpu, active); + DEFINE_BREAKPOINT_ATTR(attr); + + attr.bp_addr = addr; + attr.bp_len = len; + attr.bp_type = type; + + if (!active) + attr.disabled = 1; + + return perf_event_create_kernel_counter(&attr, cpu, -1, triggered); } /** -- cgit v1.2.3 From 6a9401a7ac13e62ef2baf4d46e022d303edc3050 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 13:22:21 +0100 Subject: x86/amd-iommu: Separate internal interface definitions This patch moves all function declarations which are only used inside the driver code to a seperate header file. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 10 ++++----- arch/x86/include/asm/amd_iommu_proto.h | 38 ++++++++++++++++++++++++++++++++++ arch/x86/include/asm/amd_iommu_types.h | 5 ----- arch/x86/kernel/amd_iommu.c | 1 + arch/x86/kernel/amd_iommu_init.c | 1 + 5 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 arch/x86/include/asm/amd_iommu_proto.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index b8ef2ee93643..089133899b3c 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -23,15 +23,13 @@ #include #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); + extern void amd_iommu_detect(void); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_flush_all_domains(void); -extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_apply_erratum_63(u16 devid); + #else + static inline void amd_iommu_detect(void) { } + #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h new file mode 100644 index 000000000000..84786fb9a23b --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_PROTO_H +#define _ASM_X86_AMD_IOMMU_PROTO_H + +struct amd_iommu; + +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); + +#ifndef CONFIG_AMD_IOMMU_STATS + +static inline void amd_iommu_stats_init(void) { } + +#endif /* !CONFIG_AMD_IOMMU_STATS */ + +#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 2a2cc7a78a81..27db7f9c7aeb 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -462,11 +462,6 @@ struct __iommu_counter { #define ADD_STATS_COUNTER(name, x) #define SUB_STATS_COUNTER(name, x) -static inline void amd_iommu_stats_init(void) { } - #endif /* CONFIG_AMD_IOMMU_STATS */ -/* some function prototypes */ -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b74b21247584..50d2b05a458b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 72bdbdac9b48..db30cfe86fce 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From bf3118c1276d27fe9e84aa42382da25ee0750777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 13:39:19 +0100 Subject: x86/amd-iommu: Update copyright headers This patch updates the copyright headers in the relevant AMD IOMMU driver files to match the date of the latest changes. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 2 +- arch/x86/include/asm/amd_iommu_types.h | 2 +- arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/amd_iommu_init.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 089133899b3c..5af2982133b5 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 27db7f9c7aeb..df5e9c8a856a 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 50d2b05a458b..7fe28be3b548 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index db30cfe86fce..cee11424d412 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * -- cgit v1.2.3 From bb52777ec4d736c2d7c4f037b32d4eeeb172ed89 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 14:31:51 +0100 Subject: x86/amd-iommu: Add an index field to struct amd_iommu This patch adds an index field to struct amd_iommu which can be used to lookup it up in an array. This index will be used in struct protection_domain to keep track which protection domain has devices behind which IOMMU. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 17 +++++++++++++++++ arch/x86/kernel/amd_iommu_init.c | 15 +++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index df5e9c8a856a..ab3e7bf1af71 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -24,6 +24,11 @@ #include #include +/* + * Maximum number of IOMMUs supported + */ +#define MAX_IOMMUS 32 + /* * some size calculation constants */ @@ -291,6 +296,9 @@ struct dma_ops_domain { struct amd_iommu { struct list_head list; + /* Index within the IOMMU array */ + int index; + /* locks the accesses to the hardware */ spinlock_t lock; @@ -356,6 +364,15 @@ struct amd_iommu { */ extern struct list_head amd_iommu_list; +/* + * Array with pointers to each IOMMU struct + * The indices are referenced in the protection domains + */ +extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; + +/* Number of IOMMUs present in the system */ +extern int amd_iommus_present; + /* * Structure defining one entry in the device table */ diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index cee11424d412..8567d1698027 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -137,6 +137,10 @@ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +/* Array to assign indices to IOMMUs*/ +struct amd_iommu *amd_iommus[MAX_IOMMUS]; +int amd_iommus_present; + /* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains @@ -840,7 +844,18 @@ static void __init free_iommu_all(void) static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); + iommu->index = amd_iommus_present++; + + if (unlikely(iommu->index >= MAX_IOMMUS)) { + WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + return -ENOSYS; + } + + /* Index is fine - add IOMMU to the array */ + amd_iommus[iommu->index] = iommu; /* * Copy data from ACPI table entry to the iommu struct -- cgit v1.2.3 From c459611424d8b8396060eb766e23bd0c70c993bc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 14:57:32 +0100 Subject: x86/amd-iommu: Add per IOMMU reference counting This patch adds reference counting for protection domains per IOMMU. This allows a smarter TLB flushing strategy. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 ++ arch/x86/kernel/amd_iommu.c | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ab3e7bf1af71..e68b14811380 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -238,7 +238,9 @@ struct protection_domain { unsigned long flags; /* flags to find out type of domain */ bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ + unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ void *priv; /* private data */ + }; /* diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7fe28be3b548..8c38f0085403 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1175,7 +1175,9 @@ static void __attach_device(struct amd_iommu *iommu, /* update DTE entry */ set_dte_entry(devid, domain); - domain->dev_cnt += 1; + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; /* ready */ spin_unlock(&domain->lock); @@ -1209,6 +1211,9 @@ static void attach_device(struct amd_iommu *iommu, */ static void __detach_device(struct protection_domain *domain, u16 devid) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + BUG_ON(!iommu); /* lock domain */ spin_lock(&domain->lock); @@ -1223,8 +1228,9 @@ static void __detach_device(struct protection_domain *domain, u16 devid) amd_iommu_apply_erratum_63(devid); - /* decrease reference counter */ - domain->dev_cnt -= 1; + /* decrease reference counters */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; /* ready */ spin_unlock(&domain->lock); -- cgit v1.2.3 From 0518a3a4585cb3eeeaf14ca57131f11d252130c6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:00:05 +0100 Subject: x86/amd-iommu: Add function to complete a tlb flush This patch adds a function to the AMD IOMMU driver which completes all queued commands an all IOMMUs a specific domain has devices attached on. This is required in a later patch when per-domain flushing is implemented. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8c38f0085403..8fa5cc3e02d2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -376,6 +376,22 @@ out: return 0; } +static void iommu_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + /* * Command send function for invalidating a device table entry */ @@ -1758,7 +1774,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (addr == DMA_ERROR_CODE) goto out; - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1791,7 +1807,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1863,7 +1879,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1914,7 +1930,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1969,7 +1985,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out_free; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2010,7 +2026,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.3 From 6de8ad9b9ee0ec5b52ec8ec41401833e5e89186f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 18:30:32 +0100 Subject: x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs This patch extends the iommu_flush_pages function to flush the TLB entries on all IOMMUs the domain has devices on. This basically gives up the former assumption that dma_ops domains are only bound to one IOMMU in the system. For dma_ops domains this is still true but not for IOMMU-API managed domains. Giving this assumption up for dma_ops domains too allows code simplification. Further it splits out the main logic into a generic function which can be used by iommu_flush_tlb too. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8fa5cc3e02d2..7c06e574008f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -447,10 +447,10 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, * It invalidates a single PTE if the range to flush is within a single * page. Otherwise it flushes the whole TLB of the IOMMU. */ -static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, - u64 address, size_t size) +static void __iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) { - int s = 0; + int s = 0, i; unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -464,9 +464,26 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, s = 1; } - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); - return 0; + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need a TLB flush + */ + iommu_queue_inv_iommu_pages(amd_iommus[i], address, + domain->id, pde, s); + } + + return; +} + +static void iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + __iommu_flush_pages(domain, address, size, 0); } /* Flush the whole IO/TLB for a given protection domain */ @@ -1683,7 +1700,7 @@ retry: iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) - iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + iommu_flush_pages(&dma_dom->domain, address, size); out: return address; @@ -1731,7 +1748,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, dma_addr, size); dma_dom->need_flush = false; } } -- cgit v1.2.3 From dcd1e92e405449ecc5e8bd8fcfebf3b2a13d3d37 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 15:30:58 +0100 Subject: x86/amd-iommu: Use __iommu_flush_pages for tlb flushes This patch re-implements iommu_flush_tlb functions to use the __iommu_flush_pages logic. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7c06e574008f..c55aa079ded3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -451,7 +451,7 @@ static void __iommu_flush_pages(struct protection_domain *domain, u64 address, size_t size, int pde) { int s = 0, i; - unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); + unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -487,23 +487,15 @@ static void iommu_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb(struct protection_domain *domain) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); - - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb_pde(struct protection_domain *domain) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); - - iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } /* @@ -1236,7 +1228,7 @@ static void attach_device(struct amd_iommu *iommu, * here to evict all dirty stuff. */ iommu_queue_inv_dev_entry(iommu, devid); - iommu_flush_tlb_pde(iommu, domain->id); + iommu_flush_tlb_pde(domain); } /* @@ -1697,7 +1689,7 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - iommu_flush_tlb(iommu, dma_dom->domain.id); + iommu_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) iommu_flush_pages(&dma_dom->domain, address, size); -- cgit v1.2.3 From 601367d76bd19b7eea2286ae99e5b1cb5d74f38d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:08:55 +0100 Subject: x86/amd-iommu: Remove iommu_flush_domain function This iommu_flush_tlb_pde function does essentially the same. So the iommu_flush_domain function is redundant and can be removed. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c55aa079ded3..b2c19f41f238 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -528,20 +528,6 @@ static void flush_all_domains_on_iommu(struct amd_iommu *iommu) } -/* - * This function is used to flush the IO/TLB for a given protection domain - * on every IOMMU in the system - */ -static void iommu_flush_domain(u16 domid) -{ - struct amd_iommu *iommu; - - INC_STATS_COUNTER(domain_flush_all); - - for_each_iommu(iommu) - flush_domain_on_iommu(iommu, domid); -} - void amd_iommu_flush_all_domains(void) { struct amd_iommu *iommu; @@ -1464,7 +1450,7 @@ static void update_domain(struct protection_domain *domain) update_device_table(domain); flush_devices_by_domain(domain); - iommu_flush_domain(domain->id); + iommu_flush_tlb_pde(domain); domain->updated = false; } @@ -2377,7 +2363,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova += PAGE_SIZE; } - iommu_flush_domain(domain->id); + iommu_flush_tlb_pde(domain); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, -- cgit v1.2.3 From aeb26f55337d4310840c8adc3ec7d6aebb714472 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:44:01 +0100 Subject: x86/amd-iommu: Implement protection domain list This patch adds code to keep a global list of all protection domains. This allows to simplify the resume code. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 7 +++++++ arch/x86/kernel/amd_iommu.c | 33 +++++++++++++++++++++++++++++++++ arch/x86/kernel/amd_iommu_init.c | 8 ++++++++ 3 files changed, 48 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index e68b14811380..b332b7f7d8d6 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -231,6 +231,7 @@ extern bool amd_iommu_dump; * independent of their use. */ struct protection_domain { + struct list_head list; /* for list of all protection domains */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ @@ -375,6 +376,12 @@ extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; /* Number of IOMMUs present in the system */ extern int amd_iommus_present; +/* + * Declarations for the global list of all protection domains + */ +extern spinlock_t amd_iommu_pd_lock; +extern struct list_head amd_iommu_pd_list; + /* * Structure defining one entry in the device table */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b2c19f41f238..0c4319b13014 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -985,6 +985,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, * ****************************************************************************/ +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_add(&domain->list, &amd_iommu_pd_list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_del(&domain->list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + static u16 domain_id_alloc(void) { unsigned long flags; @@ -1073,6 +1098,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; + del_domain_from_list(&dom->domain); + free_pagetable(&dom->domain); for (i = 0; i < APERTURE_MAX_RANGES; ++i) { @@ -1113,6 +1140,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; + add_domain_to_list(&dma_dom->domain); + if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) goto free_dma_dom; @@ -2188,6 +2217,8 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; + del_domain_from_list(domain); + if (domain->id) domain_id_free(domain->id); @@ -2207,6 +2238,8 @@ static struct protection_domain *protection_domain_alloc(void) if (!domain->id) goto out_err; + add_domain_to_list(domain); + return domain; out_err: diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8567d1698027..73d5173765d2 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -141,6 +141,12 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the struct amd_iommu *amd_iommus[MAX_IOMMUS]; int amd_iommus_present; +/* + * List of protection domains - used during resume + */ +LIST_HEAD(amd_iommu_pd_list); +spinlock_t amd_iommu_pd_lock; + /* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains @@ -1263,6 +1269,8 @@ static int __init amd_iommu_init(void) */ amd_iommu_pd_alloc_bitmap[0] = 1; + spin_lock_init(&amd_iommu_pd_lock); + /* * now the data structures are allocated and basically initialized * start the real acpi table scan -- cgit v1.2.3 From e3306664eb307ae4cc93211cd9f12d0dbd49de65 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:48:58 +0100 Subject: x86/amd-iommu: Reimplement amd_iommu_flush_all_domains() This patch reimplementes the amd_iommu_flush_all_domains function to use the global protection domain list instead of flushing every domain on every IOMMU. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0c4319b13014..5141f5608c5c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -530,10 +530,12 @@ static void flush_all_domains_on_iommu(struct amd_iommu *iommu) void amd_iommu_flush_all_domains(void) { - struct amd_iommu *iommu; + struct protection_domain *domain; - for_each_iommu(iommu) - flush_all_domains_on_iommu(iommu); + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + iommu_flush_tlb_pde(domain); + iommu_flush_complete(domain); + } } static void flush_all_devices_for_iommu(struct amd_iommu *iommu) -- cgit v1.2.3 From 09b4280439ef6fdc55f1353a9135034336eb5d26 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 17:02:44 +0100 Subject: x86/amd-iommu: Reimplement flush_all_domains_on_iommu() This patch reimplements the function flush_all_domains_on_iommu to use the global protection domain list. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5141f5608c5c..a1bd99d390ab 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -499,43 +499,48 @@ static void iommu_flush_tlb_pde(struct protection_domain *domain) } /* - * This function flushes one domain on one IOMMU + * This function flushes all domains that have devices on the given IOMMU */ -static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) +static void flush_all_domains_on_iommu(struct amd_iommu *iommu) { - struct iommu_cmd cmd; + u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + struct protection_domain *domain; unsigned long flags; - __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - domid, 1, 1); - - spin_lock_irqsave(&iommu->lock, flags); - __iommu_queue_command(iommu, &cmd); - __iommu_completion_wait(iommu); - __iommu_wait_for_completion(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); -} - -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) -{ - int i; + spin_lock_irqsave(&amd_iommu_pd_lock, flags); - for (i = 1; i < MAX_DOMAIN_ID; ++i) { - if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + if (domain->dev_iommu[iommu->index] == 0) continue; - flush_domain_on_iommu(iommu, i); + + spin_lock(&domain->lock); + iommu_queue_inv_iommu_pages(iommu, address, domain->id, 1, 1); + iommu_flush_complete(domain); + spin_unlock(&domain->lock); } + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } +/* + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. + */ void amd_iommu_flush_all_domains(void) { struct protection_domain *domain; + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); list_for_each_entry(domain, &amd_iommu_pd_list, list) { + spin_lock(&domain->lock); iommu_flush_tlb_pde(domain); iommu_flush_complete(domain); + spin_unlock(&domain->lock); } + + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } static void flush_all_devices_for_iommu(struct amd_iommu *iommu) -- cgit v1.2.3 From 318afd41d2eca3224de3fd85a3b9a27a3010a98d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 18:32:38 +0100 Subject: x86/amd-iommu: Make np-cache a global flag The non-present cache flag was IOMMU local until now which doesn't make sense. Make this a global flag so we can remove the lase user of 'struct iommu' in the map/unmap path. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 3 +++ arch/x86/kernel/amd_iommu.c | 8 +------- arch/x86/kernel/amd_iommu_init.c | 6 ++++++ 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index b332b7f7d8d6..4899f783df68 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -211,6 +211,9 @@ extern bool amd_iommu_dump; printk(KERN_INFO "AMD-Vi: " format, ## arg); \ } while(0); +/* global flag if IOMMUs cache non-present entries */ +extern bool amd_iommu_np_cache; + /* * Make iterating over all IOMMUs easier */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a1bd99d390ab..5ebd24e4fc57 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -131,12 +131,6 @@ static void amd_iommu_stats_init(void) #endif -/* returns !0 if the IOMMU is caching non-present entries in its TLB */ -static int iommu_has_npcache(struct amd_iommu *iommu) -{ - return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); -} - /**************************************************************************** * * Interrupt handling functions @@ -1713,7 +1707,7 @@ retry: if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; - } else if (unlikely(iommu_has_npcache(iommu))) + } else if (unlikely(amd_iommu_np_cache)) iommu_flush_pages(&dma_dom->domain, address, size); out: diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 73d5173765d2..fbe4c3c02a91 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -141,6 +141,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the struct amd_iommu *amd_iommus[MAX_IOMMUS]; int amd_iommus_present; +/* IOMMUs have a non-present cache? */ +bool amd_iommu_np_cache __read_mostly; + /* * List of protection domains - used during resume */ @@ -891,6 +894,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + amd_iommu_np_cache = true; + return pci_enable_device(iommu->dev); } -- cgit v1.2.3 From 420aef8a3acfc3e75427107e23d5a9bafd17c477 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 16:14:57 +0100 Subject: x86/amd-iommu: Use check_device for amd_iommu_dma_supported The check_device logic needs to include the dma_supported checks to be really sure. Merge the dma_supported logic into check_device and use it to implement dma_supported. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5ebd24e4fc57..ac27b1d6bd12 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1375,9 +1375,27 @@ static struct notifier_block device_nb = { */ static bool check_device(struct device *dev) { + u16 bdf; + struct pci_dev *pcidev; + if (!dev || !dev->dma_mask) return false; + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return false; + + pcidev = to_pci_dev(dev); + + bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + + /* Out of our scope? */ + if (bdf > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[bdf] == NULL) + return false; + return true; } @@ -2065,22 +2083,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - u16 bdf; - struct pci_dev *pcidev; - - /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) - return 0; - - pcidev = to_pci_dev(dev); - - bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* Out of our scope? */ - if (bdf > amd_iommu_last_bdf) - return 0; - - return 1; + return check_device(dev); } /* -- cgit v1.2.3 From f99c0f1c75f75924a6f19cb40a21ccefc6e8754d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 16:52:56 +0100 Subject: x86/amd-iommu: Use check_device in get_device_resources Every call-place of get_device_resources calls check_device before it. So call it from get_device_resources directly and simplify the code. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 86 +++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 58 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ac27b1d6bd12..c5102ebdcbd9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1432,35 +1432,24 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) * If the device is not yet associated with a domain this is also done * in this function. */ -static int get_device_resources(struct device *dev, - struct amd_iommu **iommu, - struct protection_domain **domain, - u16 *bdf) +static bool get_device_resources(struct device *dev, + struct amd_iommu **iommu, + struct protection_domain **domain, + u16 *bdf) { struct dma_ops_domain *dma_dom; struct pci_dev *pcidev; u16 _bdf; - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; - - if (dev->bus != &pci_bus_type) - return 0; - - pcidev = to_pci_dev(dev); - _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) - return 0; - - *bdf = amd_iommu_alias_table[_bdf]; + if (!check_device(dev)) + return false; - *iommu = amd_iommu_rlookup_table[*bdf]; - if (*iommu == NULL) - return 0; + pcidev = to_pci_dev(dev); + _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + *bdf = amd_iommu_alias_table[_bdf]; + *iommu = amd_iommu_rlookup_table[*bdf]; *domain = domain_for_device(*bdf); + if (*domain == NULL) { dma_dom = find_protection_domain(*bdf); if (!dma_dom) @@ -1474,7 +1463,7 @@ static int get_device_resources(struct device *dev, if (domain_for_device(_bdf) == NULL) attach_device(*iommu, *domain, _bdf); - return 1; + return true; } static void update_device_table(struct protection_domain *domain) @@ -1797,17 +1786,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); - if (!check_device(dev)) - return DMA_ERROR_CODE; - - dma_mask = *dev->dma_mask; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (iommu == NULL || domain == NULL) + if (!get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; + dma_mask = *dev->dma_mask; + if (!dma_ops_domain(domain)) return DMA_ERROR_CODE; @@ -1838,8 +1822,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, INC_STATS_COUNTER(cnt_unmap_single); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -1893,16 +1876,11 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!check_device(dev)) - return 0; + if (!get_device_resources(dev, &iommu, &domain, &devid)) + return map_sg_no_iommu(dev, sglist, nelems, dir); dma_mask = *dev->dma_mask; - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) - return map_sg_no_iommu(dev, sglist, nelems, dir); - if (!dma_ops_domain(domain)) return 0; @@ -1958,8 +1936,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_unmap_sg); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &iommu, &domain, &devid)) return; if (!dma_ops_domain(domain)) @@ -1994,24 +1971,22 @@ static void *alloc_coherent(struct device *dev, size_t size, INC_STATS_COUNTER(cnt_alloc_coherent); - if (!check_device(dev)) - return NULL; + if (!get_device_resources(dev, &iommu, &domain, &devid)) { + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + *dma_addr = __pa(virt_addr); + return virt_addr; + } - if (!get_device_resources(dev, &iommu, &domain, &devid)) - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + dma_mask = dev->coherent_dma_mask; + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; - flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return NULL; paddr = virt_to_phys(virt_addr); - if (!iommu || !domain) { - *dma_addr = (dma_addr_t)paddr; - return virt_addr; - } - if (!dma_ops_domain(domain)) goto out_free; @@ -2054,12 +2029,7 @@ static void free_coherent(struct device *dev, size_t size, INC_STATS_COUNTER(cnt_free_coherent); - if (!check_device(dev)) - return; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) + if (!get_device_resources(dev, &iommu, &domain, &devid)) goto free_mem; if (!dma_ops_domain(domain)) -- cgit v1.2.3 From 680525e06ddccda8c51bdddf532cd5b7d950c411 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 18:44:42 +0100 Subject: x86/amd-iommu: Remove iommu parameter from dma_ops_domain_(un)map The parameter is unused in these function so remove it from the parameter list. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c5102ebdcbd9..da3f9d8ee395 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1585,8 +1585,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, * This is the generic map function. It maps one 4kb page at paddr to * the given address in the DMA address space for the domain. */ -static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, unsigned long address, phys_addr_t paddr, int direction) @@ -1620,8 +1619,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, /* * The generic unmapping function for on page in the DMA address space. */ -static void dma_ops_domain_unmap(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static void dma_ops_domain_unmap(struct dma_ops_domain *dom, unsigned long address) { struct aperture_range *aperture; @@ -1700,7 +1698,7 @@ retry: start = address; for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + ret = dma_ops_domain_map(dma_dom, start, paddr, dir); if (ret == DMA_ERROR_CODE) goto out_unmap; @@ -1724,7 +1722,7 @@ out_unmap: for (--i; i >= 0; --i) { start -= PAGE_SIZE; - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); } dma_ops_free_addresses(dma_dom, address, pages); @@ -1754,7 +1752,7 @@ static void __unmap_single(struct amd_iommu *iommu, start = dma_addr; for (i = 0; i < pages; ++i) { - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); start += PAGE_SIZE; } -- cgit v1.2.3 From 576175c2503ae9b0f930ee9a6a0abaf7ef8956ad Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 19:08:46 +0100 Subject: x86/amd-iommu: Make alloc_new_range aware of multiple IOMMUs Since the assumption that an dma_ops domain is only bound to one IOMMU was given up we need to make alloc_new_range aware of it. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index da3f9d8ee395..687f617b95d7 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -788,11 +788,11 @@ static u64 *fetch_pte(struct protection_domain *domain, * aperture in case of dma_ops domain allocation or address allocation * failure. */ -static int alloc_new_range(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + struct amd_iommu *iommu; int i; #ifdef CONFIG_IOMMU_STRESS @@ -832,14 +832,17 @@ static int alloc_new_range(struct amd_iommu *iommu, dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Intialize the exclusion range if necessary */ - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset && - iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - dma_ops_reserve_addresses(dma_dom, startpage, pages); + for_each_iommu(iommu) { + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset + && iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + startpage = iommu->exclusion_start >> PAGE_SHIFT; + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } } /* @@ -1143,7 +1146,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) add_domain_to_list(&dma_dom->domain); - if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) goto free_dma_dom; /* @@ -1686,7 +1689,7 @@ retry: */ dma_dom->next_address = dma_dom->aperture_size; - if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) + if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) goto out; /* -- cgit v1.2.3 From cd8c82e875c27ee0d8b59fb76bc12aa9db6a70c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 19:33:56 +0100 Subject: x86/amd-iommu: Remove iommu parameter from __(un)map_single With the prior changes this parameter is not longer required. This patch removes it from the function and all callers. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 687f617b95d7..c04dcb7f40b2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1653,7 +1653,6 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom, * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, - struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, @@ -1737,8 +1736,7 @@ out_unmap: * Does the reverse of the __map_single function. Must be called with * the domain lock held too */ -static void __unmap_single(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static void __unmap_single(struct dma_ops_domain *dma_dom, dma_addr_t dma_addr, size_t size, int dir) @@ -1797,7 +1795,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, return DMA_ERROR_CODE; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + addr = __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); if (addr == DMA_ERROR_CODE) goto out; @@ -1832,7 +1830,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, dir); + __unmap_single(domain->priv, dma_addr, size, dir); iommu_flush_complete(domain); @@ -1890,7 +1888,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); - s->dma_address = __map_single(dev, iommu, domain->priv, + s->dma_address = __map_single(dev, domain->priv, paddr, s->length, dir, false, dma_mask); @@ -1910,7 +1908,7 @@ out: unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } @@ -1946,7 +1944,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } @@ -1996,7 +1994,7 @@ static void *alloc_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); - *dma_addr = __map_single(dev, iommu, domain->priv, paddr, + *dma_addr = __map_single(dev, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == DMA_ERROR_CODE) { @@ -2038,7 +2036,7 @@ static void free_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); iommu_flush_complete(domain); -- cgit v1.2.3 From f3be07da531ceef1b51295e5becc9bc07670b671 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 19:43:14 +0100 Subject: x86/amd-iommu: Remove iommu specific handling from dma_ops path This patch finishes the removal of all iommu specific handling code in the dma_ops path. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c04dcb7f40b2..2cd5800e6888 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1436,11 +1436,11 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) * in this function. */ static bool get_device_resources(struct device *dev, - struct amd_iommu **iommu, struct protection_domain **domain, u16 *bdf) { struct dma_ops_domain *dma_dom; + struct amd_iommu *iommu; struct pci_dev *pcidev; u16 _bdf; @@ -1450,21 +1450,21 @@ static bool get_device_resources(struct device *dev, pcidev = to_pci_dev(dev); _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); *bdf = amd_iommu_alias_table[_bdf]; - *iommu = amd_iommu_rlookup_table[*bdf]; + iommu = amd_iommu_rlookup_table[*bdf]; *domain = domain_for_device(*bdf); if (*domain == NULL) { dma_dom = find_protection_domain(*bdf); if (!dma_dom) - dma_dom = (*iommu)->default_dom; + dma_dom = iommu->default_dom; *domain = &dma_dom->domain; - attach_device(*iommu, *domain, *bdf); + attach_device(iommu, *domain, *bdf); DUMP_printk("Using protection domain %d for device %s\n", (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) - attach_device(*iommu, *domain, _bdf); + attach_device(iommu, *domain, _bdf); return true; } @@ -1776,7 +1776,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; dma_addr_t addr; @@ -1785,7 +1784,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; @@ -1815,13 +1814,12 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; INC_STATS_COUNTER(cnt_unmap_single); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -1864,7 +1862,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; int i; @@ -1875,7 +1872,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) return map_sg_no_iommu(dev, sglist, nelems, dir); dma_mask = *dev->dma_mask; @@ -1927,7 +1924,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; struct scatterlist *s; u16 devid; @@ -1935,7 +1931,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_unmap_sg); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) return; if (!dma_ops_domain(domain)) @@ -1962,7 +1958,6 @@ static void *alloc_coherent(struct device *dev, size_t size, { unsigned long flags; void *virt_addr; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; phys_addr_t paddr; @@ -1970,7 +1965,7 @@ static void *alloc_coherent(struct device *dev, size_t size, INC_STATS_COUNTER(cnt_alloc_coherent); - if (!get_device_resources(dev, &iommu, &domain, &devid)) { + if (!get_device_resources(dev, &domain, &devid)) { virt_addr = (void *)__get_free_pages(flag, get_order(size)); *dma_addr = __pa(virt_addr); return virt_addr; @@ -2022,13 +2017,12 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; INC_STATS_COUNTER(cnt_free_coherent); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) goto free_mem; if (!dma_ops_domain(domain)) -- cgit v1.2.3 From 15898bbcb48fc86c2baff156163df0941ecb6a15 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 15:39:42 +0100 Subject: x86/amd-iommu: Let domain_for_device handle aliases If there is no domain associated to a device yet and the device has an alias device which already has a domain, the original device needs to have the same domain as the alias device. This patch changes domain_for_device to handle this situation and directly assigns the alias device domain to the device in this situation. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 227 ++++++++++++++++++++++++++------------------ 1 file changed, 135 insertions(+), 92 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2cd5800e6888..75470ffee358 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -71,6 +71,19 @@ static u64 *fetch_pte(struct protection_domain *domain, unsigned long address, int map_size); static void update_domain(struct protection_domain *domain); +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return calc_devid(pdev->bus->number, pdev->devfn); +} + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1174,26 +1187,13 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -/* - * Find out the protection domain structure for a given PCI device. This - * will give us the pointer to the page table root for example. - */ -static struct protection_domain *domain_for_device(u16 devid) -{ - struct protection_domain *dom; - unsigned long flags; - - read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = amd_iommu_pd_table[devid]; - read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - return dom; -} - static void set_dte_entry(u16 devid, struct protection_domain *domain) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; u64 pte_root = virt_to_phys(domain->pt_root); + BUG_ON(amd_iommu_pd_table[devid] != NULL); + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; @@ -1203,42 +1203,87 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); amd_iommu_pd_table[devid] = domain; + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the changes DTE entry */ + iommu_queue_inv_dev_entry(iommu, devid); +} + +static void clear_dte_entry(u16 devid) +{ + struct protection_domain *domain = amd_iommu_pd_table[devid]; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + BUG_ON(domain == NULL); + + /* remove domain from the lookup table */ + amd_iommu_pd_table[devid] = NULL; + + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; + + amd_iommu_apply_erratum_63(devid); + + /* decrease reference counters */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; + + iommu_queue_inv_dev_entry(iommu, devid); } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void __attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int __attach_device(struct device *dev, + struct protection_domain *domain) { + u16 devid = get_device_id(dev); + u16 alias = amd_iommu_alias_table[devid]; + /* lock domain */ spin_lock(&domain->lock); - /* update DTE entry */ - set_dte_entry(devid, domain); + /* Some sanity checks */ + if (amd_iommu_pd_table[alias] != NULL && + amd_iommu_pd_table[alias] != domain) + return -EBUSY; - /* Do reference counting */ - domain->dev_iommu[iommu->index] += 1; - domain->dev_cnt += 1; + if (amd_iommu_pd_table[devid] != NULL && + amd_iommu_pd_table[devid] != domain) + return -EBUSY; + + /* Do real assignment */ + if (alias != devid && + amd_iommu_pd_table[alias] == NULL) + set_dte_entry(alias, domain); + + if (amd_iommu_pd_table[devid] == NULL) + set_dte_entry(devid, domain); /* ready */ spin_unlock(&domain->lock); + + return 0; } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int attach_device(struct device *dev, + struct protection_domain *domain) { unsigned long flags; + int ret; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __attach_device(iommu, domain, devid); + ret = __attach_device(dev, domain); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); /* @@ -1246,62 +1291,70 @@ static void attach_device(struct amd_iommu *iommu, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - iommu_queue_inv_dev_entry(iommu, devid); iommu_flush_tlb_pde(domain); + + return ret; } /* * Removes a device from a protection domain (unlocked) */ -static void __detach_device(struct protection_domain *domain, u16 devid) +static void __detach_device(struct device *dev) { + u16 devid = get_device_id(dev); struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; BUG_ON(!iommu); - /* lock domain */ - spin_lock(&domain->lock); - - /* remove domain from the lookup table */ - amd_iommu_pd_table[devid] = NULL; - - /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; - amd_iommu_dev_table[devid].data[2] = 0; - - amd_iommu_apply_erratum_63(devid); - - /* decrease reference counters */ - domain->dev_iommu[iommu->index] -= 1; - domain->dev_cnt -= 1; - - /* ready */ - spin_unlock(&domain->lock); + clear_dte_entry(devid); /* * If we run in passthrough mode the device must be assigned to the * passthrough domain if it is detached from any other domain */ - if (iommu_pass_through) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - __attach_device(iommu, pt_domain, devid); - } + if (iommu_pass_through) + __attach_device(dev, pt_domain); } /* * Removes a device from a protection domain (with devtable_lock held) */ -static void detach_device(struct protection_domain *domain, u16 devid) +static void detach_device(struct device *dev) { unsigned long flags; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(domain, devid); + __detach_device(dev); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ + struct protection_domain *dom; + unsigned long flags; + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = amd_iommu_pd_table[devid]; + if (dom == NULL && + amd_iommu_pd_table[alias] != NULL) { + __attach_device(dev, amd_iommu_pd_table[alias]); + dom = amd_iommu_pd_table[devid]; + } + + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1322,7 +1375,7 @@ static int device_change_notifier(struct notifier_block *nb, if (iommu == NULL) goto out; - domain = domain_for_device(devid); + domain = domain_for_device(dev); if (domain && !dma_ops_domain(domain)) WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " @@ -1334,7 +1387,7 @@ static int device_change_notifier(struct notifier_block *nb, goto out; if (iommu_pass_through) break; - detach_device(domain, devid); + detach_device(dev); break; case BUS_NOTIFY_ADD_DEVICE: /* allocate a protection domain if a device is added */ @@ -1441,30 +1494,25 @@ static bool get_device_resources(struct device *dev, { struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - struct pci_dev *pcidev; - u16 _bdf; if (!check_device(dev)) return false; - pcidev = to_pci_dev(dev); - _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - *bdf = amd_iommu_alias_table[_bdf]; + *bdf = get_device_id(dev); + *domain = domain_for_device(dev); iommu = amd_iommu_rlookup_table[*bdf]; - *domain = domain_for_device(*bdf); - if (*domain == NULL) { - dma_dom = find_protection_domain(*bdf); - if (!dma_dom) - dma_dom = iommu->default_dom; - *domain = &dma_dom->domain; - attach_device(iommu, *domain, *bdf); - DUMP_printk("Using protection domain %d for device %s\n", - (*domain)->id, dev_name(dev)); - } + if (*domain != NULL) + return true; - if (domain_for_device(_bdf) == NULL) - attach_device(iommu, *domain, _bdf); + /* Device not bount yet - bind it */ + dma_dom = find_protection_domain(*bdf); + if (!dma_dom) + dma_dom = iommu->default_dom; + *domain = &dma_dom->domain; + attach_device(dev, *domain); + DUMP_printk("Using protection domain %d for device %s\n", + (*domain)->id, dev_name(dev)); return true; } @@ -2068,7 +2116,7 @@ static void prealloc_protection_domains(void) if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; - if (domain_for_device(devid)) + if (domain_for_device(&dev->dev)) continue; iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -2079,9 +2127,7 @@ static void prealloc_protection_domains(void) init_unity_mappings_for_device(dma_dom, devid); dma_dom->target_dev = devid; - attach_device(iommu, &dma_dom->domain, devid); - if (__devid != devid) - attach_device(iommu, &dma_dom->domain, __devid); + attach_device(&dev->dev, &dma_dom->domain); list_add_tail(&dma_dom->list, &iommu_pd_list); } @@ -2174,7 +2220,7 @@ static void cleanup_domain(struct protection_domain *domain) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) if (amd_iommu_pd_table[devid] == domain) - __detach_device(domain, devid); + clear_dte_entry(devid); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } @@ -2262,7 +2308,6 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { - struct protection_domain *domain = dom->priv; struct amd_iommu *iommu; struct pci_dev *pdev; u16 devid; @@ -2275,7 +2320,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, devid = calc_devid(pdev->bus->number, pdev->devfn); if (devid > 0) - detach_device(domain, devid); + detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -2292,6 +2337,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct protection_domain *old_domain; struct amd_iommu *iommu; struct pci_dev *pdev; + int ret; u16 devid; if (dev->bus != &pci_bus_type) @@ -2309,15 +2355,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, if (!iommu) return -EINVAL; - old_domain = domain_for_device(devid); + old_domain = amd_iommu_pd_table[devid]; if (old_domain) - detach_device(old_domain, devid); + detach_device(dev); - attach_device(iommu, domain, devid); + ret = attach_device(dev, domain); iommu_completion_wait(iommu); - return 0; + return ret; } static int amd_iommu_map_range(struct iommu_domain *dom, @@ -2414,8 +2460,9 @@ static struct iommu_ops amd_iommu_ops = { int __init amd_iommu_init_passthrough(void) { + struct amd_iommu *iommu; struct pci_dev *dev = NULL; - u16 devid, devid2; + u16 devid; /* allocate passthroug domain */ pt_domain = protection_domain_alloc(); @@ -2425,20 +2472,16 @@ int __init amd_iommu_init_passthrough(void) pt_domain->mode |= PAGE_MODE_NONE; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - struct amd_iommu *iommu; devid = calc_devid(dev->bus->number, dev->devfn); if (devid > amd_iommu_last_bdf) continue; - devid2 = amd_iommu_alias_table[devid]; - - iommu = amd_iommu_rlookup_table[devid2]; + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; - __attach_device(iommu, pt_domain, devid); - __attach_device(iommu, pt_domain, devid2); + attach_device(&dev->dev, pt_domain); } pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); -- cgit v1.2.3 From 94f6d190eeed91cb2bb901aa7816edd1e2405347 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 16:40:02 +0100 Subject: x86/amd-iommu: Simplify get_device_resources() With the previous changes the get_device_resources function can be simplified even more. The only important information for the callers is the protection domain. This patch renames the function to get_domain() and let it only return the protection domain for a device. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 86 +++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 75470ffee358..e5bbe9a0c192 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1463,6 +1463,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) { struct dma_ops_domain *entry, *ret = NULL; unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; if (list_empty(&iommu_pd_list)) return NULL; @@ -1470,7 +1471,8 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) spin_lock_irqsave(&iommu_pd_list_lock, flags); list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid) { + if (entry->target_dev == devid || + entry->target_dev == alias) { ret = entry; break; } @@ -1488,33 +1490,31 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) * If the device is not yet associated with a domain this is also done * in this function. */ -static bool get_device_resources(struct device *dev, - struct protection_domain **domain, - u16 *bdf) +static struct protection_domain *get_domain(struct device *dev) { + struct protection_domain *domain; struct dma_ops_domain *dma_dom; - struct amd_iommu *iommu; + u16 devid = get_device_id(dev); if (!check_device(dev)) - return false; + return ERR_PTR(-EINVAL); - *bdf = get_device_id(dev); - *domain = domain_for_device(dev); - iommu = amd_iommu_rlookup_table[*bdf]; + domain = domain_for_device(dev); + if (domain != NULL && !dma_ops_domain(domain)) + return ERR_PTR(-EBUSY); - if (*domain != NULL) - return true; + if (domain != NULL) + return domain; /* Device not bount yet - bind it */ - dma_dom = find_protection_domain(*bdf); + dma_dom = find_protection_domain(devid); if (!dma_dom) - dma_dom = iommu->default_dom; - *domain = &dma_dom->domain; - attach_device(dev, *domain); + dma_dom = amd_iommu_rlookup_table[devid]->default_dom; + attach_device(dev, &dma_dom->domain); DUMP_printk("Using protection domain %d for device %s\n", - (*domain)->id, dev_name(dev)); + dma_dom->domain.id, dev_name(dev)); - return true; + return &dma_dom->domain; } static void update_device_table(struct protection_domain *domain) @@ -1825,23 +1825,22 @@ static dma_addr_t map_page(struct device *dev, struct page *page, { unsigned long flags; struct protection_domain *domain; - u16 devid; dma_addr_t addr; u64 dma_mask; phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); - if (!get_device_resources(dev, &domain, &devid)) - /* device not handled by any AMD IOMMU */ + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_ERROR_CODE; dma_mask = *dev->dma_mask; - if (!dma_ops_domain(domain)) - return DMA_ERROR_CODE; - spin_lock_irqsave(&domain->lock, flags); + addr = __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); if (addr == DMA_ERROR_CODE) @@ -1863,15 +1862,11 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, { unsigned long flags; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_unmap_single); - if (!get_device_resources(dev, &domain, &devid)) - /* device not handled by any AMD IOMMU */ - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); @@ -1911,7 +1906,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, { unsigned long flags; struct protection_domain *domain; - u16 devid; int i; struct scatterlist *s; phys_addr_t paddr; @@ -1920,14 +1914,14 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!get_device_resources(dev, &domain, &devid)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) return map_sg_no_iommu(dev, sglist, nelems, dir); + else if (IS_ERR(domain)) + return 0; dma_mask = *dev->dma_mask; - if (!dma_ops_domain(domain)) - return 0; - spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { @@ -1974,15 +1968,12 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, unsigned long flags; struct protection_domain *domain; struct scatterlist *s; - u16 devid; int i; INC_STATS_COUNTER(cnt_unmap_sg); - if (!get_device_resources(dev, &domain, &devid)) - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); @@ -2007,17 +1998,18 @@ static void *alloc_coherent(struct device *dev, size_t size, unsigned long flags; void *virt_addr; struct protection_domain *domain; - u16 devid; phys_addr_t paddr; u64 dma_mask = dev->coherent_dma_mask; INC_STATS_COUNTER(cnt_alloc_coherent); - if (!get_device_resources(dev, &domain, &devid)) { + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { virt_addr = (void *)__get_free_pages(flag, get_order(size)); *dma_addr = __pa(virt_addr); return virt_addr; - } + } else if (IS_ERR(domain)) + return NULL; dma_mask = dev->coherent_dma_mask; flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -2029,9 +2021,6 @@ static void *alloc_coherent(struct device *dev, size_t size, paddr = virt_to_phys(virt_addr); - if (!dma_ops_domain(domain)) - goto out_free; - if (!dma_mask) dma_mask = *dev->dma_mask; @@ -2066,14 +2055,11 @@ static void free_coherent(struct device *dev, size_t size, { unsigned long flags; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_free_coherent); - if (!get_device_resources(dev, &domain, &devid)) - goto free_mem; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) goto free_mem; spin_lock_irqsave(&domain->lock, flags); -- cgit v1.2.3 From 71c70984e5afc20d304fbb523f1c8bb42c4ceb36 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 16:43:06 +0100 Subject: x86/amd-iommu: Move find_protection_domain to helper functions This is a helper function and when its placed in the helper function section we can remove its forward declaration. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 57 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e5bbe9a0c192..405f8dad7c77 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -59,7 +59,6 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); -static struct dma_ops_domain *find_protection_domain(u16 devid); static u64 *alloc_pte(struct protection_domain *domain, unsigned long address, int end_lvl, u64 **pte_page, gfp_t gfp); @@ -84,6 +83,34 @@ static inline u16 get_device_id(struct device *dev) return calc_devid(pdev->bus->number, pdev->devfn); } +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid || + entry->target_dev == alias) { + ret = entry; + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1455,34 +1482,6 @@ static bool check_device(struct device *dev) return true; } -/* - * In this function the list of preallocated protection domains is traversed to - * find the domain for a specific device - */ -static struct dma_ops_domain *find_protection_domain(u16 devid) -{ - struct dma_ops_domain *entry, *ret = NULL; - unsigned long flags; - u16 alias = amd_iommu_alias_table[devid]; - - if (list_empty(&iommu_pd_list)) - return NULL; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - - list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid || - entry->target_dev == alias) { - ret = entry; - break; - } - } - - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - return ret; -} - /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the -- cgit v1.2.3 From 98fc5a693bbdda498a556654c70d1e31a186c988 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:19:23 +0100 Subject: x86/amd-iommu: Use get_device_id and check_device where appropriate The logic of these two functions is reimplemented (at least in parts) in places in the code. This patch removes these code duplications and uses the functions instead. As a side effect it moves check_device() to the helper function code section. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 110 ++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 61 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 405f8dad7c77..d10195b685a7 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -111,6 +111,33 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) return ret; } +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + u16 devid; + + if (!dev || !dev->dma_mask) + return false; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return false; + + devid = get_device_id(dev); + + /* Out of our scope? */ + if (devid > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[devid] == NULL) + return false; + + return true; +} + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1386,22 +1413,17 @@ static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid = calc_devid(pdev->bus->number, pdev->devfn); + u16 devid; struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; unsigned long flags; - if (devid > amd_iommu_last_bdf) - goto out; - - devid = amd_iommu_alias_table[devid]; - - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - goto out; + if (!check_device(dev)) + return 0; + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; domain = domain_for_device(dev); if (domain && !dma_ops_domain(domain)) @@ -1452,36 +1474,6 @@ static struct notifier_block device_nb = { * *****************************************************************************/ -/* - * This function checks if the driver got a valid device from the caller to - * avoid dereferencing invalid pointers. - */ -static bool check_device(struct device *dev) -{ - u16 bdf; - struct pci_dev *pcidev; - - if (!dev || !dev->dma_mask) - return false; - - /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) - return false; - - pcidev = to_pci_dev(dev); - - bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* Out of our scope? */ - if (bdf > amd_iommu_last_bdf) - return false; - - if (amd_iommu_rlookup_table[bdf] == NULL) - return false; - - return true; -} - /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -2094,15 +2086,20 @@ static void prealloc_protection_domains(void) struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - u16 devid, __devid; + u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - __devid = devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) + + /* Do we handle this device? */ + if (!check_device(&dev->dev)) continue; - devid = amd_iommu_alias_table[devid]; + + /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; + + devid = get_device_id(&dev->dev); + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; @@ -2294,17 +2291,14 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct amd_iommu *iommu; - struct pci_dev *pdev; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return; - pdev = to_pci_dev(dev); - - devid = calc_devid(pdev->bus->number, pdev->devfn); + devid = get_device_id(dev); - if (devid > 0) + if (amd_iommu_pd_table[devid] != NULL) detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; @@ -2321,20 +2315,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct protection_domain *domain = dom->priv; struct protection_domain *old_domain; struct amd_iommu *iommu; - struct pci_dev *pdev; int ret; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return -EINVAL; - pdev = to_pci_dev(dev); - - devid = calc_devid(pdev->bus->number, pdev->devfn); - - if (devid >= amd_iommu_last_bdf || - devid != amd_iommu_alias_table[devid]) - return -EINVAL; + devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -2458,10 +2445,11 @@ int __init amd_iommu_init_passthrough(void) while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) + if (!check_device(&dev->dev)) continue; + devid = get_device_id(&dev->dev); + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; -- cgit v1.2.3 From 87a64d523825351a23743e69949c2a8c2077cecf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:26:43 +0100 Subject: x86/amd-iommu: Remove iommu parameter from dma_ops_domain_alloc This function doesn't use the parameter anymore so it can be removed. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d10195b685a7..17e83ecb8b22 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1188,7 +1188,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also intializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) +static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; @@ -1443,7 +1443,7 @@ static int device_change_notifier(struct notifier_block *nb, dma_domain = find_protection_domain(devid); if (dma_domain) goto out; - dma_domain = dma_ops_domain_alloc(iommu); + dma_domain = dma_ops_domain_alloc(); if (!dma_domain) goto out; dma_domain->target_dev = devid; @@ -2085,7 +2085,6 @@ static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; - struct amd_iommu *iommu; u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { @@ -2100,10 +2099,7 @@ static void prealloc_protection_domains(void) devid = get_device_id(&dev->dev); - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - continue; - dma_dom = dma_ops_domain_alloc(iommu); + dma_dom = dma_ops_domain_alloc(); if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); @@ -2139,7 +2135,7 @@ int __init amd_iommu_init_dma_ops(void) * protection domain will be assigned to the default one. */ for_each_iommu(iommu) { - iommu->default_dom = dma_ops_domain_alloc(iommu); + iommu->default_dom = dma_ops_domain_alloc(); if (iommu->default_dom == NULL) return -ENOMEM; iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; -- cgit v1.2.3 From 308973d3b958b9328a1051642c81ee6dbc5021a4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:43:32 +0100 Subject: x86/amd-iommu: Move some pte allocation functions in the right section This patch moves alloc_pte() and fetch_pte() into the page table handling code section so that the forward declarations for them could be removed. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 193 +++++++++++++++++++++----------------------- 1 file changed, 94 insertions(+), 99 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 17e83ecb8b22..90b365024c24 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -59,15 +59,10 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, int end_lvl, - u64 **pte_page, gfp_t gfp); static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages); static void reset_iommu_command_buffer(struct amd_iommu *iommu); -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size); static void update_domain(struct protection_domain *domain); /**************************************************************************** @@ -664,6 +659,100 @@ void amd_iommu_flush_all_devices(void) * ****************************************************************************/ +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) +{ + u64 *pte; + + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + int end_lvl, + u64 **pte_page, + gfp_t gfp) +{ + u64 *pte, *page; + int level; + + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, int map_size) +{ + int level; + u64 *pte; + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + + while (level > map_size) { + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + + if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { + pte = NULL; + break; + } + } + + return pte; +} + /* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. @@ -819,37 +908,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * called with domain->lock held */ -/* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. - */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size) -{ - int level; - u64 *pte; - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > map_size) { - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; - - if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { - pte = NULL; - break; - } - } - - return pte; -} - /* * This function is used to add a new aperture range to an existing * aperture in case of dma_ops domain allocation or address allocation @@ -1534,69 +1592,6 @@ static void update_domain(struct protection_domain *domain) domain->updated = false; } -/* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - gfp_t gfp) -{ - u64 *pte; - - if (domain->mode == PAGE_MODE_6_LEVEL) - /* address space already 64 bit large */ - return false; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - return false; - - *pte = PM_LEVEL_PDE(domain->mode, - virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; - domain->updated = true; - - return true; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - int end_lvl, - u64 **pte_page, - gfp_t gfp) -{ - u64 *pte, *page; - int level; - - while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > end_lvl) { - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); - } - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - /* * This function fetches the PTE for a given address in the aperture */ -- cgit v1.2.3 From 171e7b3739e175eea7b32eca9dbe189589e14a28 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:47:56 +0100 Subject: x86/amd-iommu: Rearrange dma_ops related functions This patch rearranges two dma_ops related functions so that their forward declarations are not longer necessary. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 89 +++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 90b365024c24..14b60c0cdc70 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -57,11 +57,6 @@ struct iommu_cmd { u32 data[4]; }; -static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e); -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages); static void reset_iommu_command_buffer(struct amd_iommu *iommu); static void update_domain(struct protection_domain *domain); @@ -822,28 +817,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, return 0; } -/* - * Init the unity mappings for a specific IOMMU in the system - * - * Basically iterates over all unity mapping entries and applies them to - * the default domain DMA of that IOMMU if necessary. - */ -static int iommu_init_unity_mappings(struct amd_iommu *iommu) -{ - struct unity_map_entry *entry; - int ret; - - list_for_each_entry(entry, &amd_iommu_unity_map, list) { - if (!iommu_for_unity_map(iommu, entry)) - continue; - ret = dma_ops_unity_map(iommu->default_dom, entry); - if (ret) - return ret; - } - - return 0; -} - /* * This function actually applies the mapping to the page table of the * dma_ops domain. @@ -872,6 +845,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, return 0; } +/* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + /* * Inits the unity mappings required for a specific device */ @@ -908,6 +903,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * called with domain->lock held */ +/* + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. + */ +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) +{ + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; + + if (start_page + pages > last_page) + pages = last_page - start_page; + + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); + } +} + /* * This function is used to add a new aperture range to an existing * aperture in case of dma_ops domain allocation or address allocation @@ -1166,26 +1181,6 @@ static void domain_id_free(int id) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) -{ - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} - static void free_pagetable(struct protection_domain *domain) { int i, j; -- cgit v1.2.3 From 8793abeb783c12cc37f92f6133fd6468152b98df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 27 Nov 2009 11:40:33 +0100 Subject: x86/amd-iommu: Remove support for domain sharing This patch makes device isolation mandatory and removes support for the amd_iommu=share option. This simplifies the code in several places. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 3 --- arch/x86/kernel/amd_iommu.c | 10 ++-------- arch/x86/kernel/amd_iommu_init.c | 17 ----------------- 3 files changed, 2 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 4899f783df68..02b6a0fd863c 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -451,9 +451,6 @@ extern struct protection_domain **amd_iommu_pd_table; /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; -/* will be 1 if device isolation is enabled */ -extern bool amd_iommu_isolate; - /* * If true, the addresses will be flushed on unmap time, not when * they are reused diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 14b60c0cdc70..ed58a1688391 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -148,7 +148,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); DECLARE_STATS_COUNTER(total_map_requests); static struct dentry *stats_dir; -static struct dentry *de_isolate; static struct dentry *de_fflush; static void amd_iommu_stats_add(struct __iommu_counter *cnt) @@ -166,9 +165,6 @@ static void amd_iommu_stats_init(void) if (stats_dir == NULL) return; - de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, - (u32 *)&amd_iommu_isolate); - de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, (u32 *)&amd_iommu_unmap_flush); @@ -2135,11 +2131,9 @@ int __init amd_iommu_init_dma_ops(void) } /* - * If device isolation is enabled, pre-allocate the protection - * domains for each device. + * Pre-allocate the protection domains for each device. */ - if (amd_iommu_isolate) - prealloc_protection_domains(); + prealloc_protection_domains(); iommu_detected = 1; swiotlb = 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fbe4c3c02a91..fe1686f6f91b 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -125,13 +125,6 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -#ifdef CONFIG_IOMMU_STRESS -bool amd_iommu_isolate = false; -#else -bool amd_iommu_isolate = true; /* if true, device isolation is - enabled */ -#endif - bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -1308,12 +1301,6 @@ static int __init amd_iommu_init(void) if (iommu_pass_through) goto out; - printk(KERN_INFO "AMD-Vi: device isolation "); - if (amd_iommu_isolate) - printk("enabled\n"); - else - printk("disabled\n"); - if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); else @@ -1387,10 +1374,6 @@ static int __init parse_amd_iommu_dump(char *str) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strncmp(str, "isolate", 7) == 0) - amd_iommu_isolate = true; - if (strncmp(str, "share", 5) == 0) - amd_iommu_isolate = false; if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.3 From 657cbb6b6cba0f9c98c5299e0c803b2c0e67ea0a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 15:26:46 +0100 Subject: x86/amd-iommu: Use dev->arch->iommu to store iommu related information This patch changes IOMMU code to use dev->archdata->iommu to store information about the alias device and the domain the device is attached to. This allows the driver to get rid of the amd_iommu_pd_table in the future. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 8 +++ arch/x86/include/asm/device.h | 2 +- arch/x86/kernel/amd_iommu.c | 109 ++++++++++++++++++++++++++------- 3 files changed, 95 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 02b6a0fd863c..9eaa27b46860 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -247,6 +247,14 @@ struct protection_domain { }; +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct device *alias; /* The Alias Device */ + struct protection_domain *domain; /* Domain the device is bound to */ +}; + /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9ca45b..029f230ab637 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,7 +8,7 @@ struct dev_archdata { #ifdef CONFIG_X86_64 struct dma_map_ops *dma_ops; #endif -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ed58a1688391..3214e8806f95 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -73,6 +73,11 @@ static inline u16 get_device_id(struct device *dev) return calc_devid(pdev->bus->number, pdev->devfn); } +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ + return dev->archdata.iommu; +} + /* * In this function the list of preallocated protection domains is traversed to * find the domain for a specific device @@ -128,6 +133,35 @@ static bool check_device(struct device *dev) return true; } +static int iommu_init_device(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *pdev; + u16 devid, alias; + + if (dev->archdata.iommu) + return 0; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); + if (pdev) + dev_data->alias = &pdev->dev; + + dev->archdata.iommu = dev_data; + + + return 0; +} + +static void iommu_uninit_device(struct device *dev) +{ + kfree(dev->archdata.iommu); +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1346,28 +1380,39 @@ static void clear_dte_entry(u16 devid) static int __attach_device(struct device *dev, struct protection_domain *domain) { - u16 devid = get_device_id(dev); - u16 alias = amd_iommu_alias_table[devid]; + struct iommu_dev_data *dev_data, *alias_data; + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return -EINVAL; /* lock domain */ spin_lock(&domain->lock); /* Some sanity checks */ - if (amd_iommu_pd_table[alias] != NULL && - amd_iommu_pd_table[alias] != domain) + if (alias_data->domain != NULL && + alias_data->domain != domain) return -EBUSY; - if (amd_iommu_pd_table[devid] != NULL && - amd_iommu_pd_table[devid] != domain) + if (dev_data->domain != NULL && + dev_data->domain != domain) return -EBUSY; /* Do real assignment */ if (alias != devid && - amd_iommu_pd_table[alias] == NULL) + alias_data->domain == NULL) { + alias_data->domain = domain; set_dte_entry(alias, domain); + } - if (amd_iommu_pd_table[devid] == NULL) + if (dev_data->domain == NULL) { + dev_data->domain = domain; set_dte_entry(devid, domain); + } /* ready */ spin_unlock(&domain->lock); @@ -1406,10 +1451,12 @@ static void __detach_device(struct device *dev) { u16 devid = get_device_id(dev); struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + struct iommu_dev_data *dev_data = get_dev_data(dev); BUG_ON(!iommu); clear_dte_entry(devid); + dev_data->domain = NULL; /* * If we run in passthrough mode the device must be assigned to the @@ -1439,18 +1486,23 @@ static void detach_device(struct device *dev) static struct protection_domain *domain_for_device(struct device *dev) { struct protection_domain *dom; + struct iommu_dev_data *dev_data, *alias_data; unsigned long flags; u16 devid, alias; - devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return NULL; read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = amd_iommu_pd_table[devid]; + dom = dev_data->domain; if (dom == NULL && - amd_iommu_pd_table[alias] != NULL) { - __attach_device(dev, amd_iommu_pd_table[alias]); - dom = amd_iommu_pd_table[devid]; + alias_data->domain != NULL) { + __attach_device(dev, alias_data->domain); + dom = alias_data->domain; } read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); @@ -1473,14 +1525,12 @@ static int device_change_notifier(struct notifier_block *nb, devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - domain = domain_for_device(dev); - - if (domain && !dma_ops_domain(domain)) - WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " - "to a non-dma-ops domain\n", dev_name(dev)); switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: + + domain = domain_for_device(dev); + if (!domain) goto out; if (iommu_pass_through) @@ -1488,6 +1538,11 @@ static int device_change_notifier(struct notifier_block *nb, detach_device(dev); break; case BUS_NOTIFY_ADD_DEVICE: + + iommu_init_device(dev); + + domain = domain_for_device(dev); + /* allocate a protection domain if a device is added */ dma_domain = find_protection_domain(devid); if (dma_domain) @@ -1502,6 +1557,10 @@ static int device_change_notifier(struct notifier_block *nb, spin_unlock_irqrestore(&iommu_pd_list_lock, flags); break; + case BUS_NOTIFY_DEL_DEVICE: + + iommu_uninit_device(dev); + default: goto out; } @@ -2079,6 +2138,8 @@ static void prealloc_protection_domains(void) if (!check_device(&dev->dev)) continue; + iommu_init_device(&dev->dev); + /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; @@ -2270,6 +2331,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { + struct iommu_dev_data *dev_data = dev->archdata.iommu; struct amd_iommu *iommu; u16 devid; @@ -2278,7 +2340,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, devid = get_device_id(dev); - if (amd_iommu_pd_table[devid] != NULL) + if (dev_data->domain != NULL) detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; @@ -2293,7 +2355,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { struct protection_domain *domain = dom->priv; - struct protection_domain *old_domain; + struct iommu_dev_data *dev_data; struct amd_iommu *iommu; int ret; u16 devid; @@ -2301,14 +2363,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, if (!check_device(dev)) return -EINVAL; + dev_data = dev->archdata.iommu; + devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) return -EINVAL; - old_domain = amd_iommu_pd_table[devid]; - if (old_domain) + if (dev_data->domain) detach_device(dev); ret = attach_device(dev, domain); -- cgit v1.2.3 From 241000556f751dacd332df6ab2e903a23746e51e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 25 Nov 2009 15:59:57 +0100 Subject: x86/amd-iommu: Add device bind reference counting This patch adds a reference count to each device to count how often the device was bound to that domain. This is important for single devices that act as an alias for a number of others. These devices must stay bound to their domains until all devices that alias to it are unbound from the same domain. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 1 + arch/x86/kernel/amd_iommu.c | 37 ++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 9eaa27b46860..434e90ed89c5 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -253,6 +253,7 @@ struct protection_domain { struct iommu_dev_data { struct device *alias; /* The Alias Device */ struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reverent count */ }; /* diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3214e8806f95..f5db7d5e444e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -152,6 +152,8 @@ static int iommu_init_device(struct device *dev) if (pdev) dev_data->alias = &pdev->dev; + atomic_set(&dev_data->bind, 0); + dev->archdata.iommu = dev_data; @@ -1403,10 +1405,13 @@ static int __attach_device(struct device *dev, return -EBUSY; /* Do real assignment */ - if (alias != devid && - alias_data->domain == NULL) { - alias_data->domain = domain; - set_dte_entry(alias, domain); + if (alias != devid) { + if (alias_data->domain == NULL) { + alias_data->domain = domain; + set_dte_entry(alias, domain); + } + + atomic_inc(&alias_data->bind); } if (dev_data->domain == NULL) { @@ -1414,6 +1419,8 @@ static int __attach_device(struct device *dev, set_dte_entry(devid, domain); } + atomic_inc(&dev_data->bind); + /* ready */ spin_unlock(&domain->lock); @@ -1449,20 +1456,34 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct device *dev) { - u16 devid = get_device_id(dev); + u16 devid = get_device_id(dev), alias; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct iommu_dev_data *dev_data = get_dev_data(dev); + struct iommu_dev_data *alias_data; BUG_ON(!iommu); - clear_dte_entry(devid); - dev_data->domain = NULL; + devid = get_device_id(dev); + alias = get_device_id(dev_data->alias); + + if (devid != alias) { + alias_data = get_dev_data(dev_data->alias); + if (atomic_dec_and_test(&alias_data->bind)) { + clear_dte_entry(alias); + alias_data->domain = NULL; + } + } + + if (atomic_dec_and_test(&dev_data->bind)) { + clear_dte_entry(devid); + dev_data->domain = NULL; + } /* * If we run in passthrough mode the device must be assigned to the * passthrough domain if it is detached from any other domain */ - if (iommu_pass_through) + if (iommu_pass_through && dev_data->domain == NULL) __attach_device(dev, pt_domain); } -- cgit v1.2.3 From 7c392cbe984d904f7c89a6a75b2ac245254e8da5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 11:13:32 +0100 Subject: x86/amd-iommu: Keep devices per domain in a list This patch introduces a list to each protection domain which keeps all devices associated with the domain. This can be used later to optimize certain functions and to completly remove the amd_iommu_pd_table. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 ++ arch/x86/kernel/amd_iommu.c | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 434e90ed89c5..93953d1922c4 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -235,6 +235,7 @@ extern bool amd_iommu_np_cache; */ struct protection_domain { struct list_head list; /* for list of all protection domains */ + struct list_head dev_list; /* List of all devices in this domain */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ @@ -251,6 +252,7 @@ struct protection_domain { * This struct contains device specific data for the IOMMU */ struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ struct device *alias; /* The Alias Device */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reverent count */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f5db7d5e444e..530d6080940f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1286,6 +1286,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) dma_dom->domain.id = domain_id_alloc(); if (dma_dom->domain.id == 0) goto free_dma_dom; + INIT_LIST_HEAD(&dma_dom->domain.dev_list); dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1408,6 +1409,7 @@ static int __attach_device(struct device *dev, if (alias != devid) { if (alias_data->domain == NULL) { alias_data->domain = domain; + list_add(&alias_data->list, &domain->dev_list); set_dte_entry(alias, domain); } @@ -1416,6 +1418,7 @@ static int __attach_device(struct device *dev, if (dev_data->domain == NULL) { dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); set_dte_entry(devid, domain); } @@ -1460,6 +1463,7 @@ static void __detach_device(struct device *dev) struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; + unsigned long flags; BUG_ON(!iommu); @@ -1469,13 +1473,19 @@ static void __detach_device(struct device *dev) if (devid != alias) { alias_data = get_dev_data(dev_data->alias); if (atomic_dec_and_test(&alias_data->bind)) { + spin_lock_irqsave(&alias_data->domain->lock, flags); clear_dte_entry(alias); + list_del(&alias_data->list); + spin_unlock_irqrestore(&alias_data->domain->lock, flags); alias_data->domain = NULL; } } if (atomic_dec_and_test(&dev_data->bind)) { + spin_lock_irqsave(&dev_data->domain->lock, flags); clear_dte_entry(devid); + list_del(&dev_data->list); + spin_unlock_irqrestore(&dev_data->domain->lock, flags); dev_data->domain = NULL; } @@ -2294,6 +2304,7 @@ static struct protection_domain *protection_domain_alloc(void) domain->id = domain_id_alloc(); if (!domain->id) goto out_err; + INIT_LIST_HEAD(&domain->dev_list); add_domain_to_list(domain); -- cgit v1.2.3 From 7f760ddd702d162d693bc79f62c3bdd7fe55bd9d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 14:49:59 +0100 Subject: x86/amd-iommu: Cleanup attach/detach_device code This patch cleans up the attach_device and detach_device paths and fixes reference counting while at it. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 102 +++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 530d6080940f..e3363fd5eef5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1329,7 +1329,6 @@ static bool dma_ops_domain(struct protection_domain *domain) static void set_dte_entry(u16 devid, struct protection_domain *domain) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; u64 pte_root = virt_to_phys(domain->pt_root); BUG_ON(amd_iommu_pd_table[devid] != NULL); @@ -1344,18 +1343,11 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_pd_table[devid] = domain; - /* Do reference counting */ - domain->dev_iommu[iommu->index] += 1; - domain->dev_cnt += 1; - - /* Flush the changes DTE entry */ - iommu_queue_inv_dev_entry(iommu, devid); } static void clear_dte_entry(u16 devid) { struct protection_domain *domain = amd_iommu_pd_table[devid]; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; BUG_ON(domain == NULL); @@ -1368,11 +1360,51 @@ static void clear_dte_entry(u16 devid) amd_iommu_dev_table[devid].data[2] = 0; amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct device *dev, struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* Update data structures */ + dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); + set_dte_entry(devid, domain); + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the DTE entry */ + iommu_queue_inv_dev_entry(iommu, devid); +} + +static void do_detach(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); /* decrease reference counters */ - domain->dev_iommu[iommu->index] -= 1; - domain->dev_cnt -= 1; + dev_data->domain->dev_iommu[iommu->index] -= 1; + dev_data->domain->dev_cnt -= 1; + + /* Update data structures */ + dev_data->domain = NULL; + list_del(&dev_data->list); + clear_dte_entry(devid); + /* Flush the DTE entry */ iommu_queue_inv_dev_entry(iommu, devid); } @@ -1384,12 +1416,10 @@ static int __attach_device(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data, *alias_data; - u16 devid, alias; - devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; dev_data = get_dev_data(dev); alias_data = get_dev_data(dev_data->alias); + if (!alias_data) return -EINVAL; @@ -1406,21 +1436,16 @@ static int __attach_device(struct device *dev, return -EBUSY; /* Do real assignment */ - if (alias != devid) { - if (alias_data->domain == NULL) { - alias_data->domain = domain; - list_add(&alias_data->list, &domain->dev_list); - set_dte_entry(alias, domain); - } + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (alias_data->domain == NULL) + do_attach(dev_data->alias, domain); atomic_inc(&alias_data->bind); } - if (dev_data->domain == NULL) { - dev_data->domain = domain; - list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(devid, domain); - } + if (dev_data->domain == NULL) + do_attach(dev, domain); atomic_inc(&dev_data->bind); @@ -1459,35 +1484,24 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct device *dev) { - u16 devid = get_device_id(dev), alias; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; unsigned long flags; - BUG_ON(!iommu); + BUG_ON(!dev_data->domain); - devid = get_device_id(dev); - alias = get_device_id(dev_data->alias); + spin_lock_irqsave(&dev_data->domain->lock, flags); - if (devid != alias) { + if (dev_data->alias != dev) { alias_data = get_dev_data(dev_data->alias); - if (atomic_dec_and_test(&alias_data->bind)) { - spin_lock_irqsave(&alias_data->domain->lock, flags); - clear_dte_entry(alias); - list_del(&alias_data->list); - spin_unlock_irqrestore(&alias_data->domain->lock, flags); - alias_data->domain = NULL; - } + if (atomic_dec_and_test(&alias_data->bind)) + do_detach(dev_data->alias); } - if (atomic_dec_and_test(&dev_data->bind)) { - spin_lock_irqsave(&dev_data->domain->lock, flags); - clear_dte_entry(devid); - list_del(&dev_data->list); - spin_unlock_irqrestore(&dev_data->domain->lock, flags); - dev_data->domain = NULL; - } + if (atomic_dec_and_test(&dev_data->bind)) + do_detach(dev); + + spin_unlock_irqrestore(&dev_data->domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the -- cgit v1.2.3 From 3fa43655d81d471d47c44b0db4e2be1f8af32207 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 15:04:38 +0100 Subject: x86/amd-iommu: Introduce iommu_flush_device() function This patch adds a function to flush a DTE entry for a given struct device and replaces iommu_queue_inv_dev_entry calls with this function where appropriate. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e3363fd5eef5..41c4ebecced4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -494,6 +494,17 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) return ret; } +static int iommu_flush_device(struct device *dev) +{ + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + + return iommu_queue_inv_dev_entry(iommu, devid); +} + static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, u16 domid, int pde, int s) { @@ -1382,7 +1393,7 @@ static void do_attach(struct device *dev, struct protection_domain *domain) domain->dev_cnt += 1; /* Flush the DTE entry */ - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); } static void do_detach(struct device *dev) @@ -1405,7 +1416,7 @@ static void do_detach(struct device *dev) clear_dte_entry(devid); /* Flush the DTE entry */ - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); } /* @@ -1610,7 +1621,7 @@ static int device_change_notifier(struct notifier_block *nb, goto out; } - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); out: @@ -2393,7 +2404,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (!iommu) return; - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); } -- cgit v1.2.3 From b00d3bcff4d996f65e337d404b0df5dc201a01ab Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 15:35:33 +0100 Subject: x86/amd-iommu: Cleanup DTE flushing code This patch cleans up the code to flush device table entries in the IOMMU. With this chance the driver can get rid of the iommu_queue_inv_dev_entry() function. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 1 + arch/x86/kernel/amd_iommu.c | 100 +++++++++++---------------------- 2 files changed, 35 insertions(+), 66 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 93953d1922c4..f92d1b37b877 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -253,6 +253,7 @@ struct protection_domain { */ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ + struct device *dev; /* Device this data belong to */ struct device *alias; /* The Alias Device */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reverent count */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 41c4ebecced4..0eafca58926f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -146,6 +146,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; + dev_data->dev = dev; + devid = get_device_id(dev); alias = amd_iommu_alias_table[devid]; pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); @@ -478,31 +480,21 @@ static void iommu_flush_complete(struct protection_domain *domain) /* * Command send function for invalidating a device table entry */ -static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) -{ - struct iommu_cmd cmd; - int ret; - - BUG_ON(iommu == NULL); - - memset(&cmd, 0, sizeof(cmd)); - CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); - cmd.data[0] = devid; - - ret = iommu_queue_command(iommu, &cmd); - - return ret; -} - static int iommu_flush_device(struct device *dev) { struct amd_iommu *iommu; + struct iommu_cmd cmd; u16 devid; devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - return iommu_queue_inv_dev_entry(iommu, devid); + /* Build command */ + memset(&cmd, 0, sizeof(cmd)); + CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); + cmd.data[0] = devid; + + return iommu_queue_command(iommu, &cmd); } static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, @@ -592,30 +584,43 @@ static void iommu_flush_tlb_pde(struct protection_domain *domain) __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } + /* - * This function flushes all domains that have devices on the given IOMMU + * This function flushes the DTEs for all devices in domain */ -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) +static void iommu_flush_domain_devices(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + + list_for_each_entry(dev_data, &domain->dev_list, list) + iommu_flush_device(dev_data->dev); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void iommu_flush_all_domain_devices(void) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; struct protection_domain *domain; unsigned long flags; spin_lock_irqsave(&amd_iommu_pd_lock, flags); list_for_each_entry(domain, &amd_iommu_pd_list, list) { - if (domain->dev_iommu[iommu->index] == 0) - continue; - - spin_lock(&domain->lock); - iommu_queue_inv_iommu_pages(iommu, address, domain->id, 1, 1); + iommu_flush_domain_devices(domain); iommu_flush_complete(domain); - spin_unlock(&domain->lock); } spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } +void amd_iommu_flush_all_devices(void) +{ + iommu_flush_all_domain_devices(); +} + /* * This function uses heavy locking and may disable irqs for some time. But * this is no issue because it is only called during resume. @@ -637,38 +642,6 @@ void amd_iommu_flush_all_domains(void) spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } -static void flush_all_devices_for_iommu(struct amd_iommu *iommu) -{ - int i; - - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (iommu != amd_iommu_rlookup_table[i]) - continue; - - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); - } -} - -static void flush_devices_by_domain(struct protection_domain *domain) -{ - struct amd_iommu *iommu; - int i; - - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || - (amd_iommu_pd_table[i] != domain)) - continue; - - iommu = amd_iommu_rlookup_table[i]; - if (!iommu) - continue; - - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); - } -} - static void reset_iommu_command_buffer(struct amd_iommu *iommu) { pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); @@ -679,17 +652,12 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu) iommu->reset_in_progress = true; amd_iommu_reset_cmd_buffer(iommu); - flush_all_devices_for_iommu(iommu); - flush_all_domains_on_iommu(iommu); + amd_iommu_flush_all_devices(); + amd_iommu_flush_all_domains(); iommu->reset_in_progress = false; } -void amd_iommu_flush_all_devices(void) -{ - flush_devices_by_domain(NULL); -} - /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -1692,7 +1660,7 @@ static void update_domain(struct protection_domain *domain) return; update_device_table(domain); - flush_devices_by_domain(domain); + iommu_flush_domain_devices(domain); iommu_flush_tlb_pde(domain); domain->updated = false; -- cgit v1.2.3 From 8eed9833346781dd15e3bef35a91b0a40787ea3c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 15:45:41 +0100 Subject: x86/amd-iommu: Move reset_iommu_command_buffer out of locked code This patch removes the ugly contruct where the iommu->lock must be released while before calling the reset_iommu_command_buffer function. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0eafca58926f..b75fcd9b6a0f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -285,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + iommu->reset_in_progress = true; reset_iommu_command_buffer(iommu); dump_command(address); break; @@ -407,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely(i == EXIT_LOOP_COUNT)) { - spin_unlock(&iommu->lock); - reset_iommu_command_buffer(iommu); - spin_lock(&iommu->lock); - } + if (unlikely(i == EXIT_LOOP_COUNT)) + iommu->reset_in_progress = true; } /* @@ -458,6 +456,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu) out: spin_unlock_irqrestore(&iommu->lock, flags); + if (iommu->reset_in_progress) + reset_iommu_command_buffer(iommu); + return 0; } @@ -649,8 +650,6 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu) if (iommu->reset_in_progress) panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); - iommu->reset_in_progress = true; - amd_iommu_reset_cmd_buffer(iommu); amd_iommu_flush_all_devices(); amd_iommu_flush_all_domains(); -- cgit v1.2.3 From 492667dacc0ac9763969155482b1261b34ccf450 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 27 Nov 2009 13:25:47 +0100 Subject: x86/amd-iommu: Remove amd_iommu_pd_table The data that was stored in this table is now available in dev->archdata.iommu. So this table is not longer necessary. This patch removes the remaining uses of that variable and removes it from the code. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 3 --- arch/x86/kernel/amd_iommu.c | 35 +++++++++++----------------------- arch/x86/kernel/amd_iommu_init.c | 18 ----------------- 3 files changed, 11 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index f92d1b37b877..ba19ad4c47d0 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -457,9 +457,6 @@ extern unsigned amd_iommu_aperture_order; /* largest PCI device id we expect translation requests for */ extern u16 amd_iommu_last_bdf; -/* data structures for protection domain handling */ -extern struct protection_domain **amd_iommu_pd_table; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b75fcd9b6a0f..32fb09102a13 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1309,8 +1309,6 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) { u64 pte_root = virt_to_phys(domain->pt_root); - BUG_ON(amd_iommu_pd_table[devid] != NULL); - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; @@ -1318,20 +1316,10 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); - - amd_iommu_pd_table[devid] = domain; - } static void clear_dte_entry(u16 devid) { - struct protection_domain *domain = amd_iommu_pd_table[devid]; - - BUG_ON(domain == NULL); - - /* remove domain from the lookup table */ - amd_iommu_pd_table[devid] = NULL; - /* remove entry from the device table seen by the hardware */ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; amd_iommu_dev_table[devid].data[1] = 0; @@ -1641,15 +1629,11 @@ static struct protection_domain *get_domain(struct device *dev) static void update_device_table(struct protection_domain *domain) { - unsigned long flags; - int i; + struct iommu_dev_data *dev_data; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (amd_iommu_pd_table[i] != domain) - continue; - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - set_dte_entry(i, domain); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + list_for_each_entry(dev_data, &domain->dev_list, list) { + u16 devid = get_device_id(dev_data->dev); + set_dte_entry(devid, domain); } } @@ -2259,14 +2243,17 @@ free_domains: static void cleanup_domain(struct protection_domain *domain) { + struct iommu_dev_data *dev_data, *next; unsigned long flags; - u16 devid; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - if (amd_iommu_pd_table[devid] == domain) - clear_dte_entry(devid); + list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { + struct device *dev = dev_data->dev; + + do_detach(dev); + atomic_set(&dev_data->bind, 0); + } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fe1686f6f91b..7ffc39965233 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -164,12 +164,6 @@ u16 *amd_iommu_alias_table; */ struct amd_iommu **amd_iommu_rlookup_table; -/* - * The pd table (protection domain table) is used to find the protection domain - * data structure a device belongs to. Indexed with the PCI device id too. - */ -struct protection_domain **amd_iommu_pd_table; - /* * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap * to know which ones are already in use. @@ -1238,15 +1232,6 @@ static int __init amd_iommu_init(void) if (amd_iommu_rlookup_table == NULL) goto free; - /* - * Protection Domain table - maps devices to protection domains - * This table has the same size as the rlookup_table - */ - amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_pd_table == NULL) - goto free; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); @@ -1314,9 +1299,6 @@ free: free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); - free_pages((unsigned long)amd_iommu_pd_table, - get_order(rlookup_table_size)); - free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); -- cgit v1.2.3 From 18ed61da985c57eea3fe8038b13fa2837c9b3c3f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 27 Nov 2009 15:24:44 +0100 Subject: x86: hpet: Make WARN_ON understandable Andrew complained rightly that the WARN_ON in hpet_next_event() is confusing and the code comment not really helpful. Change it to WARN_ONCE and print the reason in clear text. Change the comment to explain what kind of hardware wreckage we deal with. Pointed-out-by: Andrew Morton Signed-off-by: Thomas Gleixner Cc: Venki Pallipadi --- arch/x86/kernel/hpet.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 7f024ff47d1d..ba6e65884603 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -384,11 +384,22 @@ static int hpet_next_event(unsigned long delta, hpet_writel(cnt, HPET_Tn_CMP(timer)); /* - * We need to read back the CMP register to make sure that - * what we wrote hit the chip before we compare it to the - * counter. + * We need to read back the CMP register on certain HPET + * implementations (ATI chipsets) which seem to delay the + * transfer of the compare register into the internal compare + * logic. With small deltas this might actually be too late as + * the counter could already be higher than the compare value + * at that point and we would wait for the next hpet interrupt + * forever. We found out that reading the CMP register back + * forces the transfer so we can rely on the comparison with + * the counter register below. If the read back from the + * compare register does not match the value we programmed + * then we might have a real hardware problem. We can not do + * much about it here, but at least alert the user/admin with + * a prominent warning. */ - WARN_ON_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt); + WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, + KERN_WARNING "hpet: compare register read back failed.\n"); return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From b8b7d791a8ff01d2380089279a69afa99115fb23 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 28 Nov 2009 15:03:03 +0100 Subject: x86: Use -maccumulate-outgoing-args for sane mcount prologues commit 746357d (x86: Prevent GCC 4.4.x (pentium-mmx et al) function prologue wreckage) uses -mtune=generic to work around the function prologue problem with mcount on -march=pentium-mmx and others. Jakub pointed out that we can use -maccumulate-outgoing-args instead which is selected by -mtune=generic and prevents the problem without losing the -march specific optimizations. Pointed-out-by: Jakub Jelinek Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: stable@kernel.org --- arch/x86/Makefile_32.cpu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index df7fdf811997..1937226fd502 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -49,8 +49,9 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) # Work around the pentium-mmx code generator madness of gcc4.4.x which # does stack alignment by generating horrible code _before_ the mcount # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph -# tracer assumptions -cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-mtune=generic) +# tracer assumptions. For i686, generic, core2 this is set by the +# compiler anyway +cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. -- cgit v1.2.3 From 9eaa192d8988d621217a9e6071cd403fd6010496 Mon Sep 17 00:00:00 2001 From: "Helight.Xu" Date: Mon, 30 Nov 2009 18:33:51 +0800 Subject: x86: Fix a section mismatch in arch/x86/kernel/setup.c copy_edd() should be __init. warning msg: WARNING: vmlinux.o(.text+0x7759): Section mismatch in reference from the function copy_edd() to the variable .init.data:boot_params The function copy_edd() references the variable __initdata boot_params. This is often because copy_edd lacks a __initdata annotation or the annotation of boot_params is wrong. Signed-off-by: ZhenwenXu LKML-Reference: <4B139F8F.4000907@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b5..e020b2d03b4f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -247,7 +247,7 @@ EXPORT_SYMBOL(edd); * from boot_params into a safe place. * */ -static inline void copy_edd(void) +static inline void __init copy_edd(void) { memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, sizeof(edd.mbr_signature)); @@ -256,7 +256,7 @@ static inline void copy_edd(void) edd.edd_info_nr = boot_params.eddbuf_entries; } #else -static inline void copy_edd(void) +static inline void __init copy_edd(void) { } #endif -- cgit v1.2.3 From 1cedae72904b85462082dbcfd5190309ba37f8bd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 07:32:16 +0100 Subject: hw-breakpoints: Keep track of user disabled breakpoints When we disable a breakpoint through dr7, we unregister it right away, making us lose track of its corresponding address register value. It means that the following sequence would be unsupported: - set address in dr0 - enable it through dr7 - disable it through dr7 - enable it through dr7 because we lost the address register value when we disabled the breakpoint. Don't unregister the disabled breakpoints but rather disable them. Reported-by: "K.Prasad" Signed-off-by: Frederic Weisbecker LKML-Reference: <1259735536-9236-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2941b32ea666..04d182a7cfdb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -595,7 +595,7 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) static struct perf_event * ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, - struct task_struct *tsk) + struct task_struct *tsk, int disabled) { int err; int gen_len, gen_type; @@ -616,7 +616,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, attr = bp->attr; attr.bp_len = gen_len; attr.bp_type = gen_type; - attr.disabled = 0; + attr.disabled = disabled; return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } @@ -655,13 +655,21 @@ restore: */ if (!second_pass) continue; + thread->ptrace_bps[i] = NULL; - unregister_hw_breakpoint(bp); + bp = ptrace_modify_breakpoint(bp, len, type, + tsk, 1); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; } continue; } - bp = ptrace_modify_breakpoint(bp, len, type, tsk); + bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { -- cgit v1.2.3 From ca64c47cecd0321b2e0dcbd7aaff44b68ce20654 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 1 Dec 2009 15:31:15 -0800 Subject: x86, io-apic: Move the effort of clearing remoteIRR explicitly before migrating the irq When the level-triggered interrupt is seen as an edge interrupt, we try to clear the remoteIRR explicitly (using either an io-apic eoi register when present or through the idea of changing trigger mode of the io-apic RTE to edge and then back to level). But this explicit try also needs to happen before we try to migrate the irq. Otherwise irq migration attempt will fail anyhow, as it postpones the irq migration to a later attempt when it sees the remoteIRR in the io-apic RTE still set. Signed-off-by: "Maciej W. Rozycki" Reviewed-by: Suresh Siddha Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233334.975416130@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 085e60e303cf..b377b973899e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2583,6 +2583,20 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + /* Tail end of version 0x11 I/O APIC bug workaround */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + + if (use_eoi_reg) + eoi_ioapic_irq(desc); + else { + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } + } + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2616,20 +2630,6 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } - - /* Tail end of version 0x11 I/O APIC bug workaround */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - - if (use_eoi_reg) - eoi_ioapic_irq(desc); - else { - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } - } } #ifdef CONFIG_INTR_REMAP -- cgit v1.2.3 From c29d9db338db606c3335a03f337e1d4b7f6bb727 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:16 -0800 Subject: x86, ioapic: Fix the EOI register detection mechanism Maciej W. Rozycki reported: > 82093AA I/O APIC has its version set to 0x11 and it > does not support the EOI register. Similarly I/O APICs > integrated into the 82379AB south bridge and the 82374EB/SB > EISA component. IO-APIC versions below 0x20 don't support EOI register. Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic version as 0x2. This is an error with documentation and these ICH chips use io-apic's of version 0x20 and indeed has a working EOI register for the io-apic. Fix the EOI register detection mechanism to check for version 0x20 and beyond. And also, a platform can potentially have io-apic's with different versions. Make the EOI register check per io-apic. Reported-by: Maciej W. Rozycki Signed-off-by: Suresh Siddha Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.065361533@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 115 ++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b377b973899e..78960a3b0ed0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -539,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } +static void __io_apic_modify_irq(struct irq_pin_list *entry, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + unsigned int reg, pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); +} + static void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { - int pin; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); - } + for_each_irq_pin(entry, cfg->irq_2_pin) + __io_apic_modify_irq(entry, mask_and, mask_or, final); +} + +static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) @@ -579,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { struct irq_cfg *cfg = desc->chip_data; @@ -2492,17 +2498,42 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; -static int use_eoi_reg __read_mostly; - +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. +*/ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { struct irq_pin_list *entry; for_each_irq_pin(entry, cfg->irq_2_pin) { - if (irq_remapped(irq)) - io_apic_eoi(entry->apic, entry->pin); - else - io_apic_eoi(entry->apic, cfg->vector); + if (mp_ioapics[entry->apic].apicver >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } else { + __mask_and_edge_IO_APIC_irq(entry); + __unmask_and_level_IO_APIC_irq(entry); + } } } @@ -2520,23 +2551,6 @@ static void eoi_ioapic_irq(struct irq_desc *desc) spin_unlock_irqrestore(&ioapic_lock, flags); } -static int ioapic_supports_eoi(void) -{ - struct pci_dev *root; - - root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); - if (root && root->vendor == PCI_VENDOR_ID_INTEL && - mp_ioapics[0].apicver >= 0x2) { - use_eoi_reg = 1; - printk(KERN_INFO "IO-APIC supports EOI register\n"); - } else - printk(KERN_INFO "IO-APIC doesn't support EOI\n"); - - return 0; -} - -fs_initcall(ioapic_supports_eoi); - static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2587,14 +2601,7 @@ static void ack_apic_level(unsigned int irq) if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - if (use_eoi_reg) - eoi_ioapic_irq(desc); - else { - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } + eoi_ioapic_irq(desc); } /* Now we can move and renable the irq */ -- cgit v1.2.3 From 1c83995b6c7c6bb795bce80f75fbffb15f78db2d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:17 -0800 Subject: x86, ioapic: Document another case when level irq is seen as an edge In the case when cpu goes offline, fixup_irqs() will forward any unhandled interrupt on the offlined cpu to the new cpu destination that is handling the corresponding interrupt. This interrupt forwarding is done via IPI's. Hence, in this case also level-triggered io-apic interrupt will be seen as an edge interrupt in the cpu's APIC IRR. Document this scenario in the code which handles this case by doing an explicit EOI to the io-apic to clear remote IRR of the io-apic RTE. Requested-by: Maciej W. Rozycki Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.143970505@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 78960a3b0ed0..c0b4468683f9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2586,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro + * + * Also in the case when cpu goes offline, fixup_irqs() will forward + * any unhandled interrupt on the offlined cpu to the new cpu + * destination that is handling the corresponding interrupt. This + * interrupt forwarding is done via IPI's. Hence, in this case also + * level-triggered io-apic interrupt will be seen as an edge + * interrupt in the IRR. And we can't rely on the cpu's EOI + * to be broadcasted to the IO-APIC's which will clear the remoteIRR + * corresponding to the level-triggered interrupt. Hence on IO-APIC's + * supporting EOI register, we do an explicit EOI to clear the + * remote IRR and on IO-APIC's which don't have an EOI register, + * we use the above logic (mask+edge followed by unmask+level) from + * Manfred Spraul to clear the remote IRR. */ cfg = desc->chip_data; i = cfg->vector; @@ -2597,7 +2610,13 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); - /* Tail end of version 0x11 I/O APIC bug workaround */ + /* + * Tail end of clearing remote IRR bit (either by delivering the EOI + * message via io-apic EOI register write or simulating it using + * mask+edge followed by unnask+level logic) manually when the + * level triggered interrupt is seen as the edge triggered interrupt + * at the cpu. + */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); -- cgit v1.2.3 From 6d20792e85187b27ae3d1b76678a2dd7025e8bc2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:18 -0800 Subject: x86: Remove unnecessary mdelay() from cpu_disable_common() fixup_irqs() already has a mdelay(). Remove the extra and unnecessary mdelay() from cpu_disable_common(). Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.232177348@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920e..324f2a44c221 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu) void cpu_disable_common(void) { int cpu = smp_processor_id(); - /* - * HACK: - * Allow any queued timer interrupts to get serviced - * This is only a temporary solution until we cleanup - * fixup_irqs as we do for IA64. - */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ -- cgit v1.2.3 From e859cf8656043f158b4004ccc8cbbf1ba4f97177 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:02:22 -0500 Subject: x86: Fix comments of register/stack access functions Fix typos and some redundant comments of register/stack access functions in asm/ptrace.h. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Roland McGrath Cc: Oleg Nesterov Cc: Wenji Huang Cc: Mahesh J Salgaonkar LKML-Reference: <20091201000222.7669.7477.stgit@harusame> Signed-off-by: Ingo Molnar Suggested-by: Wenji Huang --- arch/x86/include/asm/ptrace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index a3d49dd7d26e..3d11fd0f44c5 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -227,8 +227,8 @@ extern const char *regs_query_register_name(unsigned int offset); * @regs: pt_regs from which register value is gotten. * @offset: offset number of the register. * - * regs_get_register returns the value of a register whose offset from @regs - * is @offset. The @offset is the offset of the register in struct pt_regs. + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. * If @offset is bigger than MAX_REG_OFFSET, this returns 0. */ static inline unsigned long regs_get_register(struct pt_regs *regs, @@ -244,7 +244,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, * @regs: pt_regs which contains kernel stack pointer. * @addr: address which is checked. * - * regs_within_kenel_stack() checks @addr is within the kernel stack page(s). + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). * If @addr is within the kernel stack, it returns true. If not, returns false. */ static inline int regs_within_kernel_stack(struct pt_regs *regs, @@ -260,7 +260,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * is specified by @regs. If the @n th entry is NOT in the kernel stack, * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, -- cgit v1.2.3 From 01be50a308be466e122c3a8b3d535f1b673ecbd2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 27 Nov 2009 15:04:58 +0000 Subject: x86/alternatives: Check replacementlen <= instrlen at build time Having run into the run-(boot-)time check a couple of times lately, I finally took time to find a build-time check so that one doesn't need to analyze the register/stack dump and resolve this (through manual lookup in vmlinux) to the offending construct. The assembler will emit a message like "Error: value of too large for field of 1 bytes at ", which while not pointing out the source location still makes analysis quite a bit easier. Signed-off-by: Jan Beulich LKML-Reference: <4B0FF8AA0200007800022703@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc74e00..69b74a7b877f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} " .byte " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ ".section .altinstr_replacement, \"ax\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ -- cgit v1.2.3 From 99063c0bcebcc913165a5d168050326eba3e0996 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 27 Nov 2009 15:06:16 +0000 Subject: x86/alternatives: No need for alternatives-asm.h to re-invent stuff already in asm.h This at once also gets the alignment specification right for x86-64. Signed-off-by: Jan Beulich LKML-Reference: <4B0FF8F80200007800022708@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative-asm.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d343c33..b97f786a48d5 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,17 +1,13 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_32 -# define X86_ALIGN .long -#else -# define X86_ALIGN .quad -#endif +#include #ifdef CONFIG_SMP .macro LOCK_PREFIX 1: lock .section .smp_locks,"a" - .align 4 - X86_ALIGN 1b + _ASM_ALIGN + _ASM_PTR 1b .previous .endm #else -- cgit v1.2.3 From fe5ed91ddce85a0ed0e4f92c10b099873ef62167 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 3 Dec 2009 11:33:08 +0900 Subject: x86, mce: don't restart timer if disabled Even it is in error path unlikely taken, add_timer_on() at CPU_DOWN_FAILED* needs to be skipped if mce_timer is disabled. Signed-off-by: Hidetoshi Seto Cc: Andi Kleen Cc: Huang Ying Cc: Jan Beulich Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca8115..4825a3d6eb40 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1991,9 +1991,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - t->expires = round_jiffies(jiffies + + if (!mce_ignore_ce && check_interval) { + t->expires = round_jiffies(jiffies + __get_cpu_var(mce_next_interval)); - add_timer_on(t, cpu); + add_timer_on(t, cpu); + } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); break; case CPU_POST_DEAD: -- cgit v1.2.3 From 0934ac9d135021bec7f877340a039104af233bf3 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Sun, 23 Aug 2009 14:24:24 +0300 Subject: KVM: x86 emulator: Add 'push/pop sreg' instructions [avi: avoid buffer overflow] Signed-off-by: Mohammed Gamal Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1be5cd640e93..1cdfec5231d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -92,19 +92,22 @@ static u32 opcode_table[256] = { /* 0x00 - 0x07 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, + ImplicitOps | Stack, ImplicitOps | Stack, /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, 0, 0, + 0, 0, ImplicitOps | Stack, 0, /* 0x10 - 0x17 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, + ImplicitOps | Stack, ImplicitOps | Stack, /* 0x18 - 0x1F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, + ImplicitOps | Stack, ImplicitOps | Stack, /* 0x20 - 0x27 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -244,11 +247,13 @@ static u32 twobyte_table[256] = { /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, + ImplicitOps | Stack, ImplicitOps | Stack, + 0, DstMem | SrcReg | ModRM | BitOp, DstMem | SrcReg | Src2ImmByte | ModRM, DstMem | SrcReg | Src2CL | ModRM, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, + ImplicitOps | Stack, ImplicitOps | Stack, + 0, DstMem | SrcReg | ModRM | BitOp, DstMem | SrcReg | Src2ImmByte | ModRM, DstMem | SrcReg | Src2CL | ModRM, ModRM, 0, @@ -1186,6 +1191,32 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, return rc; } +static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) +{ + struct decode_cache *c = &ctxt->decode; + struct kvm_segment segment; + + kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); + + c->src.val = segment.selector; + emulate_push(ctxt); +} + +static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, int seg) +{ + struct decode_cache *c = &ctxt->decode; + unsigned long selector; + int rc; + + rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); + if (rc != 0) + return rc; + + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); + return rc; +} + static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1707,18 +1738,66 @@ special_insn: add: /* add */ emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); break; + case 0x06: /* push es */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + emulate_push_sreg(ctxt, VCPU_SREG_ES); + break; + case 0x07: /* pop es */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); + if (rc != 0) + goto done; + break; case 0x08 ... 0x0d: or: /* or */ emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); break; + case 0x0e: /* push cs */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + emulate_push_sreg(ctxt, VCPU_SREG_CS); + break; case 0x10 ... 0x15: adc: /* adc */ emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); break; + case 0x16: /* push ss */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + emulate_push_sreg(ctxt, VCPU_SREG_SS); + break; + case 0x17: /* pop ss */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); + if (rc != 0) + goto done; + break; case 0x18 ... 0x1d: sbb: /* sbb */ emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); break; + case 0x1e: /* push ds */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + emulate_push_sreg(ctxt, VCPU_SREG_DS); + break; + case 0x1f: /* pop ds */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + goto cannot_emulate; + + rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); + if (rc != 0) + goto done; + break; case 0x20 ... 0x25: and: /* and */ emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); @@ -2297,6 +2376,14 @@ twobyte_insn: jmp_rel(c, c->src.val); c->dst.type = OP_NONE; break; + case 0xa0: /* push fs */ + emulate_push_sreg(ctxt, VCPU_SREG_FS); + break; + case 0xa1: /* pop fs */ + rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); + if (rc != 0) + goto done; + break; case 0xa3: bt: /* bt */ c->dst.type = OP_NONE; @@ -2308,6 +2395,14 @@ twobyte_insn: case 0xa5: /* shld cl, r, r/m */ emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; + case 0xa8: /* push gs */ + emulate_push_sreg(ctxt, VCPU_SREG_GS); + break; + case 0xa9: /* pop gs */ + rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); + if (rc != 0) + goto done; + break; case 0xab: bts: /* bts */ /* only subword offset */ -- cgit v1.2.3 From d8769fedd4e8323d8afea9a1b2bdebff4f1d2d37 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Sun, 23 Aug 2009 14:24:25 +0300 Subject: KVM: x86 emulator: Introduce No64 decode option Introduces a new decode option "No64", which is used for instructions that are invalid in long mode. Signed-off-by: Mohammed Gamal Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1cdfec5231d0..1f0ff4afa73e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -75,6 +75,8 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define GroupMask 0xff /* Group number stored in bits 0:7 */ +/* Misc flags */ +#define No64 (1<<28) /* Source 2 operand type */ #define Src2None (0<<29) #define Src2CL (1<<29) @@ -93,21 +95,21 @@ static u32 opcode_table[256] = { ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, - ImplicitOps | Stack, ImplicitOps | Stack, + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, ImplicitOps | Stack, 0, + 0, 0, ImplicitOps | Stack | No64, 0, /* 0x10 - 0x17 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, - ImplicitOps | Stack, ImplicitOps | Stack, + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, /* 0x18 - 0x1F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, - ImplicitOps | Stack, ImplicitOps | Stack, + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, /* 0x20 - 0x27 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -161,7 +163,7 @@ static u32 opcode_table[256] = { /* 0x90 - 0x97 */ DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, /* 0x98 - 0x9F */ - 0, 0, SrcImm | Src2Imm16, 0, + 0, 0, SrcImm | Src2Imm16 | No64, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, @@ -188,7 +190,7 @@ static u32 opcode_table[256] = { ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, /* 0xC8 - 0xCF */ 0, 0, 0, ImplicitOps | Stack, - ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, + ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps, /* 0xD0 - 0xD7 */ ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, @@ -201,7 +203,7 @@ static u32 opcode_table[256] = { ByteOp | SrcImmUByte, SrcImmUByte, /* 0xE8 - 0xEF */ SrcImm | Stack, SrcImm | ImplicitOps, - SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps, + SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* 0xF0 - 0xF7 */ @@ -967,6 +969,11 @@ done_prefixes: } } + if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { + kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; + return -1; + } + if (c->d & Group) { group = c->d & GroupMask; c->modrm = insn_fetch(u8, 1, c->eip); @@ -1739,15 +1746,9 @@ special_insn: emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); break; case 0x06: /* push es */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - emulate_push_sreg(ctxt, VCPU_SREG_ES); break; case 0x07: /* pop es */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); if (rc != 0) goto done; @@ -1757,9 +1758,6 @@ special_insn: emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); break; case 0x0e: /* push cs */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - emulate_push_sreg(ctxt, VCPU_SREG_CS); break; case 0x10 ... 0x15: @@ -1767,15 +1765,9 @@ special_insn: emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); break; case 0x16: /* push ss */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - emulate_push_sreg(ctxt, VCPU_SREG_SS); break; case 0x17: /* pop ss */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); if (rc != 0) goto done; @@ -1785,15 +1777,9 @@ special_insn: emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); break; case 0x1e: /* push ds */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - emulate_push_sreg(ctxt, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); if (rc != 0) goto done; -- cgit v1.2.3 From 851ba6922ac575b749f63dee0ae072808163ba6a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 24 Aug 2009 11:10:17 +0300 Subject: KVM: Don't pass kvm_run arguments They're just copies of vcpu->run, which is readily accessible. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 10 ++-- arch/x86/kvm/emulate.c | 6 +-- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/svm.c | 102 +++++++++++++++++++----------------- arch/x86/kvm/vmx.c | 113 +++++++++++++++++++--------------------- arch/x86/kvm/x86.c | 50 +++++++++--------- 6 files changed, 141 insertions(+), 142 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d83892226f73..0b113f2b58cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -506,8 +506,8 @@ struct kvm_x86_ops { void (*tlb_flush)(struct kvm_vcpu *vcpu); - void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); - int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*run)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); @@ -568,7 +568,7 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, +int emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, u16 error_code, int emulation_type); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -585,9 +585,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; -int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port); -int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, int size, unsigned long count, int down, gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1f0ff4afa73e..0644d3df621a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1826,7 +1826,7 @@ special_insn: break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ - if (kvm_emulate_pio_string(ctxt->vcpu, NULL, + if (kvm_emulate_pio_string(ctxt->vcpu, 1, (c->d & ByteOp) ? 1 : c->op_bytes, c->rep_prefix ? @@ -1842,7 +1842,7 @@ special_insn: return 0; case 0x6e: /* outsb */ case 0x6f: /* outsw/outsd */ - if (kvm_emulate_pio_string(ctxt->vcpu, NULL, + if (kvm_emulate_pio_string(ctxt->vcpu, 0, (c->d & ByteOp) ? 1 : c->op_bytes, c->rep_prefix ? @@ -2135,7 +2135,7 @@ special_insn: case 0xef: /* out (e/r)ax,dx */ port = c->regs[VCPU_REGS_RDX]; io_dir_in = 0; - do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, + do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, (c->d & ByteOp) ? 1 : c->op_bytes, port) != 0) { c->eip = saved_eip; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 818b92ad82cf..a9024797b21f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) if (r) goto out; - er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); + er = emulate_instruction(vcpu, cr2, error_code, 0); switch (er) { case EMULATE_DONE: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c17404add91f..92048a626d4e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -286,7 +286,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm->next_rip) { - if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != + if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -1180,7 +1180,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, } } -static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int pf_interception(struct vcpu_svm *svm) { u64 fault_address; u32 error_code; @@ -1194,8 +1194,10 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } -static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int db_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->vcpu.arch.singlestep) { @@ -1223,25 +1225,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int bp_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; kvm_run->debug.arch.exception = BP_VECTOR; return 0; } -static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int ud_interception(struct vcpu_svm *svm) { int er; - er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } -static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nm_interception(struct vcpu_svm *svm) { svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) @@ -1251,7 +1255,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int mc_interception(struct vcpu_svm *svm) { /* * On an #MC intercept the MCE handler is not called automatically in @@ -1264,8 +1268,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int shutdown_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. @@ -1277,7 +1283,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 0; } -static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int io_interception(struct vcpu_svm *svm) { u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ int size, in, string; @@ -1291,7 +1297,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (string) { if (emulate_instruction(&svm->vcpu, - kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) + 0, 0, 0) == EMULATE_DO_MMIO) return 0; return 1; } @@ -1301,33 +1307,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; skip_emulated_instruction(&svm->vcpu); - return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); + return kvm_emulate_pio(&svm->vcpu, in, size, port); } -static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nmi_interception(struct vcpu_svm *svm) { return 1; } -static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int intr_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.irq_exits; return 1; } -static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int nop_on_interception(struct vcpu_svm *svm) { return 1; } -static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int halt_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; skip_emulated_instruction(&svm->vcpu); return kvm_emulate_halt(&svm->vcpu); } -static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); @@ -1837,7 +1843,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; } -static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; @@ -1857,7 +1863,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; @@ -1877,7 +1883,7 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int vmrun_interception(struct vcpu_svm *svm) { nsvm_printk("VMrun\n"); @@ -1907,7 +1913,7 @@ failed: return 1; } -static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int stgi_interception(struct vcpu_svm *svm) { if (nested_svm_check_permissions(svm)) return 1; @@ -1920,7 +1926,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int clgi_interception(struct vcpu_svm *svm) { if (nested_svm_check_permissions(svm)) return 1; @@ -1937,7 +1943,7 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; nsvm_printk("INVLPGA\n"); @@ -1950,15 +1956,13 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invalid_op_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } -static int task_switch_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int task_switch_interception(struct vcpu_svm *svm) { u16 tss_selector; int reason; @@ -2008,14 +2012,14 @@ static int task_switch_interception(struct vcpu_svm *svm, return kvm_task_switch(&svm->vcpu, tss_selector, reason); } -static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int cpuid_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; kvm_emulate_cpuid(&svm->vcpu); return 1; } -static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); @@ -2023,26 +2027,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int invlpg_interception(struct vcpu_svm *svm) { - if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } -static int emulate_on_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int emulate_on_interception(struct vcpu_svm *svm) { - if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); return 1; } -static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int cr8_write_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ - emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); + emulate_instruction(&svm->vcpu, 0, 0, 0); if (irqchip_in_kernel(svm->vcpu.kvm)) { svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; return 1; @@ -2128,7 +2133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) return 0; } -static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int rdmsr_interception(struct vcpu_svm *svm) { u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data; @@ -2221,7 +2226,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) return 0; } -static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int wrmsr_interception(struct vcpu_svm *svm) { u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) @@ -2237,17 +2242,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } -static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int msr_interception(struct vcpu_svm *svm) { if (svm->vmcb->control.exit_info_1) - return wrmsr_interception(svm, kvm_run); + return wrmsr_interception(svm); else - return rdmsr_interception(svm, kvm_run); + return rdmsr_interception(svm); } -static int interrupt_window_interception(struct vcpu_svm *svm, - struct kvm_run *kvm_run) +static int interrupt_window_interception(struct vcpu_svm *svm) { + struct kvm_run *kvm_run = svm->vcpu.run; + svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* @@ -2265,8 +2271,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm, return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm, - struct kvm_run *kvm_run) = { +static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, [SVM_EXIT_READ_CR4] = emulate_on_interception, @@ -2321,9 +2326,10 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_NPF] = pf_interception, }; -static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; trace_kvm_exit(exit_code, svm->vmcb->save.rip); @@ -2383,7 +2389,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } - return svm_exit_handlers[exit_code](svm, kvm_run); + return svm_exit_handlers[exit_code](svm); } static void reload_tss(struct kvm_vcpu *vcpu) @@ -2588,7 +2594,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) #define R "e" #endif -static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u16 fs_selector; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42caba1..4635298d000a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2659,7 +2659,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) - if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) + if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) return 1; /* * Forward all other exceptions that are valid in real mode. @@ -2710,15 +2710,16 @@ static void kvm_machine_check(void) #endif } -static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_machine_check(struct kvm_vcpu *vcpu) { /* already handled by vcpu_run */ return 1; } -static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_exception(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_run *kvm_run = vcpu->run; u32 intr_info, ex_no, error_code; unsigned long cr2, rip, dr6; u32 vect_info; @@ -2728,7 +2729,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); if (is_machine_check(intr_info)) - return handle_machine_check(vcpu, kvm_run); + return handle_machine_check(vcpu); if ((vect_info & VECTORING_INFO_VALID_MASK) && !is_page_fault(intr_info)) @@ -2744,7 +2745,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (is_invalid_opcode(intr_info)) { - er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); + er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -2803,20 +2804,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int handle_external_interrupt(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static int handle_external_interrupt(struct kvm_vcpu *vcpu) { ++vcpu->stat.irq_exits; return 1; } -static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_triple_fault(struct kvm_vcpu *vcpu) { - kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; } -static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_io(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; int size, in, string; @@ -2827,8 +2827,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) string = (exit_qualification & 16) != 0; if (string) { - if (emulate_instruction(vcpu, - kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) + if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) return 0; return 1; } @@ -2838,7 +2837,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) port = exit_qualification >> 16; skip_emulated_instruction(vcpu); - return kvm_emulate_pio(vcpu, kvm_run, in, size, port); + return kvm_emulate_pio(vcpu, in, size, port); } static void @@ -2852,7 +2851,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xc1; } -static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_cr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification, val; int cr; @@ -2887,7 +2886,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; if (cr8_prev <= cr8) return 1; - kvm_run->exit_reason = KVM_EXIT_SET_TPR; + vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } }; @@ -2922,13 +2921,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) default: break; } - kvm_run->exit_reason = 0; + vcpu->run->exit_reason = 0; pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", (int)(exit_qualification >> 4) & 3, cr); return 0; } -static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_dr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; unsigned long val; @@ -2944,13 +2943,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * guest debugging itself. */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - kvm_run->debug.arch.dr6 = vcpu->arch.dr6; - kvm_run->debug.arch.dr7 = dr; - kvm_run->debug.arch.pc = + vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; + vcpu->run->debug.arch.dr7 = dr; + vcpu->run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + vmcs_readl(GUEST_RIP); - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; + vcpu->run->debug.arch.exception = DB_VECTOR; + vcpu->run->exit_reason = KVM_EXIT_DEBUG; return 0; } else { vcpu->arch.dr7 &= ~DR7_GD; @@ -3016,13 +3015,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_cpuid(struct kvm_vcpu *vcpu) { kvm_emulate_cpuid(vcpu); return 1; } -static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_rdmsr(struct kvm_vcpu *vcpu) { u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; u64 data; @@ -3041,7 +3040,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_wrmsr(struct kvm_vcpu *vcpu) { u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) @@ -3058,14 +3057,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) { return 1; } -static int handle_interrupt_window(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static int handle_interrupt_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; @@ -3081,34 +3078,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, * possible */ if (!irqchip_in_kernel(vcpu->kvm) && - kvm_run->request_interrupt_window && + vcpu->run->request_interrupt_window && !kvm_cpu_has_interrupt(vcpu)) { - kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } return 1; } -static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_halt(struct kvm_vcpu *vcpu) { skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } -static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_vmcall(struct kvm_vcpu *vcpu) { skip_emulated_instruction(vcpu); kvm_emulate_hypercall(vcpu); return 1; } -static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_vmx_insn(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_invlpg(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -3117,14 +3114,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_wbinvd(struct kvm_vcpu *vcpu) { skip_emulated_instruction(vcpu); /* TODO: Add support for VT-d/pass-through device */ return 1; } -static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_apic_access(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; enum emulation_result er; @@ -3133,7 +3130,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); offset = exit_qualification & 0xffful; - er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + er = emulate_instruction(vcpu, 0, 0, 0); if (er != EMULATE_DONE) { printk(KERN_ERR @@ -3144,7 +3141,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_task_switch(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; @@ -3198,7 +3195,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_ept_violation(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; gpa_t gpa; @@ -3219,8 +3216,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vmcs_readl(GUEST_LINEAR_ADDRESS)); printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", (long unsigned int)exit_qualification); - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; return 0; } @@ -3290,7 +3287,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, } } -static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { u64 sptes[4]; int nr_sptes, i; @@ -3306,13 +3303,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; return 0; } -static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_nmi_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; @@ -3325,8 +3322,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } -static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static void handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); enum emulation_result err = EMULATE_DONE; @@ -3335,7 +3331,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, preempt_enable(); while (!guest_state_valid(vcpu)) { - err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); + err = emulate_instruction(vcpu, 0, 0, 0); if (err == EMULATE_DO_MMIO) break; @@ -3362,8 +3358,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, * may resume. Otherwise they set the kvm_run parameter to indicate what needs * to be done to userspace and return 0. */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) = { +static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, @@ -3403,7 +3398,7 @@ static const int kvm_vmx_max_exit_handlers = * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int vmx_handle_exit(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason = vmx->exit_reason; @@ -3425,8 +3420,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); if (unlikely(vmx->fail)) { - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - kvm_run->fail_entry.hardware_entry_failure_reason + vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; + vcpu->run->fail_entry.hardware_entry_failure_reason = vmcs_read32(VM_INSTRUCTION_ERROR); return 0; } @@ -3459,10 +3454,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) - return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); + return kvm_vmx_exit_handlers[exit_reason](vcpu); else { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = exit_reason; + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + vcpu->run->hw.hardware_exit_reason = exit_reason; } return 0; } @@ -3600,7 +3595,7 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) #define Q "l" #endif -static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3614,7 +3609,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Handle invalid guest state instead of entering VMX */ if (vmx->emulation_required && emulate_invalid_guest_state) { - handle_invalid_guest_state(vcpu, kvm_run); + handle_invalid_guest_state(vcpu); return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae07d261527c..1687d12b122a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2757,13 +2757,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) } int emulate_instruction(struct kvm_vcpu *vcpu, - struct kvm_run *run, unsigned long cr2, u16 error_code, int emulation_type) { int r, shadow_mask; struct decode_cache *c; + struct kvm_run *run = vcpu->run; kvm_clear_exception_queue(vcpu); vcpu->arch.mmio_fault_cr2 = cr2; @@ -2969,8 +2969,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) return r; } -int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, - int size, unsigned port) +int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) { unsigned long val; @@ -2999,7 +2998,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio); -int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, int size, unsigned long count, int down, gva_t address, int rep, unsigned port) { @@ -3453,17 +3452,17 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); * * No need to exit to userspace if we already have an interrupt queued. */ -static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) { return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && - kvm_run->request_interrupt_window && + vcpu->run->request_interrupt_window && kvm_arch_interrupt_allowed(vcpu)); } -static void post_kvm_run_save(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static void post_kvm_run_save(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; + kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); @@ -3525,7 +3524,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); } -static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void inject_pending_event(struct kvm_vcpu *vcpu) { /* try to reinject previous events if any */ if (vcpu->arch.exception.pending) { @@ -3561,11 +3560,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } -static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && - kvm_run->request_interrupt_window; + vcpu->run->request_interrupt_window; if (vcpu->requests) if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) @@ -3586,12 +3585,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_x86_ops->tlb_flush(vcpu); if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests)) { - kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; + vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; goto out; } if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { - kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; r = 0; goto out; } @@ -3615,7 +3614,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - inject_pending_event(vcpu, kvm_run); + inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) @@ -3641,7 +3640,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } trace_kvm_entry(vcpu->vcpu_id); - kvm_x86_ops->run(vcpu, kvm_run); + kvm_x86_ops->run(vcpu); if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { set_debugreg(current->thread.debugreg0, 0); @@ -3682,13 +3681,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_lapic_sync_from_vapic(vcpu); - r = kvm_x86_ops->handle_exit(kvm_run, vcpu); + r = kvm_x86_ops->handle_exit(vcpu); out: return r; } -static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int __vcpu_run(struct kvm_vcpu *vcpu) { int r; @@ -3708,7 +3707,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = 1; while (r > 0) { if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) - r = vcpu_enter_guest(vcpu, kvm_run); + r = vcpu_enter_guest(vcpu); else { up_read(&vcpu->kvm->slots_lock); kvm_vcpu_block(vcpu); @@ -3736,14 +3735,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); - if (dm_request_for_irq_injection(vcpu, kvm_run)) { + if (dm_request_for_irq_injection(vcpu)) { r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.request_irq_exits; } if (signal_pending(current)) { r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; } if (need_resched()) { @@ -3754,7 +3753,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } up_read(&vcpu->kvm->slots_lock); - post_kvm_run_save(vcpu, kvm_run); + post_kvm_run_save(vcpu); vapic_exit(vcpu); @@ -3794,8 +3793,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->mmio_needed = 0; down_read(&vcpu->kvm->slots_lock); - r = emulate_instruction(vcpu, kvm_run, - vcpu->arch.mmio_fault_cr2, 0, + r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, EMULTYPE_NO_DECODE); up_read(&vcpu->kvm->slots_lock); if (r == EMULATE_DO_MMIO) { @@ -3811,7 +3809,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); - r = __vcpu_run(vcpu, kvm_run); + r = __vcpu_run(vcpu); out: if (vcpu->sigset_active) -- cgit v1.2.3 From 79c727d4371aa9af47b0cdbcad53742b5a7919ea Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:18 +0300 Subject: KVM: Call pic_clear_isr() on pic reset to reuse logic there Also move call of ack notifiers after pic state change. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/i8259.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 01f151682802..ccc941af4eaf 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -225,22 +225,11 @@ int kvm_pic_read_irq(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s) { - int irq, irqbase, n; + int irq; struct kvm *kvm = s->pics_state->irq_request_opaque; struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; + u8 irr = s->irr, isr = s->imr; - if (s == &s->pics_state->pics[0]) - irqbase = 0; - else - irqbase = 8; - - for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { - if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) - if (s->irr & (1 << irq) || s->isr & (1 << irq)) { - n = irq + irqbase; - kvm_notify_acked_irq(kvm, SELECT_PIC(n), n); - } - } s->last_irr = 0; s->irr = 0; s->imr = 0; @@ -256,6 +245,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s) s->rotate_on_auto_eoi = 0; s->special_fully_nested_mode = 0; s->init4 = 0; + + for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { + if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) + if (irr & (1 << irq) || isr & (1 << irq)) { + pic_clear_isr(s, irq); + } + } } static void pic_ioport_write(void *opaque, u32 addr, u32 val) -- cgit v1.2.3 From 1a6e4a8c276e122dbeb6f9c610f29735e4236bfd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:19 +0300 Subject: KVM: Move irq sharing information to irqchip level This removes assumptions that max GSIs is smaller than number of pins. Sharing is tracked on pin level not GSI level. [avi: no PIC on ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/irq.h | 1 + include/linux/kvm_host.h | 2 +- virt/kvm/ioapic.h | 1 + virt/kvm/irq_comm.c | 59 +++++++++++++++++++++++++---------------- 5 files changed, 39 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b113f2b58cf..35d3236c9de4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,7 +410,6 @@ struct kvm_arch{ gpa_t ept_identity_map_addr; unsigned long irq_sources_bitmap; - unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; u64 vm_init_tsc; }; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a2fd38..c025a2362aae 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -71,6 +71,7 @@ struct kvm_pic { int output; /* intr from master PIC */ struct kvm_io_device dev; void (*ack_notifier)(void *opaque, int irq); + unsigned long irq_states[16]; }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b7bbb5ddd7ae..1c7f8c49e4e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level); + struct kvm *kvm, int irq_source_id, int level); union { struct { unsigned irqchip; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..6e461ade6365 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -41,6 +41,7 @@ struct kvm_ioapic { u32 irr; u32 pad; union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; + unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663ff401a..9783f5c43dae 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -31,20 +31,39 @@ #include "ioapic.h" +static inline int kvm_irq_line_state(unsigned long *irq_state, + int irq_source_id, int level) +{ + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + + return !!(*irq_state); +} + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { #ifdef CONFIG_X86 - return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); + struct kvm_pic *pic = pic_irqchip(kvm); + level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], + irq_source_id, level); + return kvm_pic_set_irq(pic, e->irqchip.pin, level); #else return -1; #endif } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { - return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], + irq_source_id, level); + + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -96,10 +115,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { struct kvm_lapic_irq irq; + if (!level) + return -1; + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); irq.dest_id = (e->msi.address_lo & @@ -125,34 +147,19 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; - unsigned long *irq_state, sig_level; int ret = -1; trace_kvm_set_irq(irq, level, irq_source_id); WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - if (irq < KVM_IOAPIC_NUM_PINS) { - irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; - - /* Logical OR for level trig interrupt */ - if (level) - set_bit(irq_source_id, irq_state); - else - clear_bit(irq_source_id, irq_state); - sig_level = !!(*irq_state); - } else if (!level) - return ret; - else /* Deal with MSI/MSI-X */ - sig_level = 1; - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ list_for_each_entry(e, &kvm->irq_routing, link) if (e->gsi == irq) { - int r = e->set(e, kvm, sig_level); + int r = e->set(e, kvm, irq_source_id, level); if (r < 0) continue; @@ -232,8 +239,14 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); return; } - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) - clear_bit(irq_source_id, &kvm->arch.irq_states[i]); + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { + clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); + if (i >= 16) + continue; +#ifdef CONFIG_X86 + clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); +#endif + } clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); mutex_unlock(&kvm->irq_lock); } -- cgit v1.2.3 From 3e71f88bc90792a187703860cf22fbed7c12cbd9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:21 +0300 Subject: KVM: Maintain back mapping from irqchip/pin to gsi Maintain back mapping from irqchip/pin to gsi to speedup interrupt acknowledgment notifications. [avi: build fix on non-x86/ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 1 + arch/x86/include/asm/kvm.h | 1 + include/linux/kvm_host.h | 9 +++++++++ virt/kvm/irq_comm.c | 31 ++++++++++++++----------------- 4 files changed, 25 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 18a7e49abbc5..bc90c75adf67 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -60,6 +60,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE 1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 #define KVM_CONTEXT_SIZE 8*1024 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe914dc59..f02e87a5206f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -79,6 +79,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE 1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f403e66557fb..cc2d7493598b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -131,7 +131,10 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; +#ifdef __KVM_HAVE_IOAPIC + struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* @@ -141,6 +144,12 @@ struct kvm_irq_routing_table { struct hlist_head map[0]; }; +#else + +struct kvm_irq_routing_table {}; + +#endif + struct kvm { spinlock_t mmu_lock; spinlock_t requests_lock; diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 81950f6f6fd9..59cf8dae0062 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -175,25 +175,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; struct hlist_node *n; - unsigned gsi = pin; - int i; + int gsi; trace_kvm_ack_irq(irqchip, pin); - for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) { - struct kvm_kernel_irq_routing_entry *e; - e = &kvm->irq_routing->rt_entries[i]; - if (e->type == KVM_IRQ_ROUTING_IRQCHIP && - e->irqchip.irqchip == irqchip && - e->irqchip.pin == pin) { - gsi = e->gsi; - break; - } - } - - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + gsi = kvm->irq_routing->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -332,6 +323,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, } e->irqchip.irqchip = ue->u.irqchip.irqchip; e->irqchip.pin = ue->u.irqchip.pin + delta; + if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) + goto out; + rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; @@ -356,7 +350,7 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags) { struct kvm_irq_routing_table *new, *old; - u32 i, nr_rt_entries = 0; + u32 i, j, nr_rt_entries = 0; int r; for (i = 0; i < nr; ++i) { @@ -377,6 +371,9 @@ int kvm_set_irq_routing(struct kvm *kvm, new->rt_entries = (void *)&new->map[nr_rt_entries]; new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < 3; i++) + for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) + new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { r = -EINVAL; -- cgit v1.2.3 From 136bdfeee7b5bc986fc94af3a40d7d13ea37bb95 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:23 +0300 Subject: KVM: Move irq ack notifier list to arch independent code Mask irq notifier list is already there. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 1 - arch/x86/include/asm/kvm_host.h | 1 - include/linux/kvm_host.h | 1 + virt/kvm/irq_comm.c | 5 ++--- virt/kvm/kvm_main.c | 1 + 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index d9b6325a9328..a362e67e0ca6 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -475,7 +475,6 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; int iommu_flags; - struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35d3236c9de4..a46e2dd9aca8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -397,7 +397,6 @@ struct kvm_arch{ struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - struct hlist_head irq_ack_notifier_list; int vapics_in_nmi_mode; unsigned int tss_addr; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cc2d7493598b..4aa5e1d9a797 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -187,6 +187,7 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_IRQCHIP struct kvm_irq_routing_table *irq_routing; struct hlist_head mask_notifier_list; + struct hlist_head irq_ack_notifier_list; #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index fb861dd956fc..f01972595938 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -186,8 +186,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_unlock(); if (gsi != -1) - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, - link) + hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); } @@ -196,7 +195,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(&kvm->irq_lock); - hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); + hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3bee94892774..6eca153e1a02 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -958,6 +958,7 @@ static struct kvm *kvm_create_vm(void) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); + INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET -- cgit v1.2.3 From eba0226bdfffe262e72b8360e4d0d12070e9a0f0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:25 +0300 Subject: KVM: Move IO APIC to its own lock The allows removal of irq_lock from the injection path. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 7 ++--- arch/x86/kvm/i8259.c | 22 +++++++++---- arch/x86/kvm/lapic.c | 5 +-- arch/x86/kvm/x86.c | 10 ++---- virt/kvm/ioapic.c | 80 ++++++++++++++++++++++++++++++++++++------------ virt/kvm/ioapic.h | 4 +++ virt/kvm/irq_comm.c | 23 ++++++++------ 7 files changed, 100 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0ad09f05efa9..4a983147f6eb 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -851,8 +851,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_IOAPIC: - memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), - sizeof(struct kvm_ioapic_state)); + r = kvm_get_ioapic(kvm, &chip->chip.ioapic); break; default: r = -EINVAL; @@ -868,9 +867,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_IOAPIC: - memcpy(ioapic_irqchip(kvm), - &chip->chip.ioapic, - sizeof(struct kvm_ioapic_state)); + r = kvm_set_ioapic(kvm, &chip->chip.ioapic); break; default: r = -EINVAL; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index ccc941af4eaf..d057c0cbd245 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) s->isr_ack |= (1 << irq); if (s != &s->pics_state->pics[0]) irq += 8; + /* + * We are dropping lock while calling ack notifiers since ack + * notifier callbacks for assigned devices call into PIC recursively. + * Other interrupt may be delivered to PIC while lock is dropped but + * it should be safe since PIC state is already updated at this stage. + */ + spin_unlock(&s->pics_state->lock); kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); + spin_lock(&s->pics_state->lock); } void kvm_pic_clear_isr_ack(struct kvm *kvm) @@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) static inline void pic_intack(struct kvm_kpic_state *s, int irq) { s->isr |= 1 << irq; - if (s->auto_eoi) { - if (s->rotate_on_auto_eoi) - s->priority_add = (irq + 1) & 7; - pic_clear_isr(s, irq); - } /* * We don't clear a level sensitive interrupt here */ if (!(s->elcr & (1 << irq))) s->irr &= ~(1 << irq); + + if (s->auto_eoi) { + if (s->rotate_on_auto_eoi) + s->priority_add = (irq + 1) & 7; + pic_clear_isr(s, irq); + } + } int kvm_pic_read_irq(struct kvm *kvm) @@ -294,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) priority = get_priority(s, s->isr); if (priority != 8) { irq = (priority + s->priority_add) & 7; - pic_clear_isr(s, irq); if (cmd == 5) s->priority_add = (irq + 1) & 7; + pic_clear_isr(s, irq); pic_update_irq(s->pics_state); } break; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23c217692ea9..df8bcb0f66d8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) trigger_mode = IOAPIC_LEVEL_TRIG; else trigger_mode = IOAPIC_EDGE_TRIG; - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) { - mutex_lock(&apic->vcpu->kvm->irq_lock); + if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); - mutex_unlock(&apic->vcpu->kvm->irq_lock); - } } static void apic_send_ipi(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1687d12b122a..fdf989f17a61 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2038,9 +2038,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_IOAPIC: - memcpy(&chip->chip.ioapic, - ioapic_irqchip(kvm), - sizeof(struct kvm_ioapic_state)); + r = kvm_get_ioapic(kvm, &chip->chip.ioapic); break; default: r = -EINVAL; @@ -2070,11 +2068,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_IOAPIC: - mutex_lock(&kvm->irq_lock); - memcpy(ioapic_irqchip(kvm), - &chip->chip.ioapic, - sizeof(struct kvm_ioapic_state)); - mutex_unlock(&kvm->irq_lock); + r = kvm_set_ioapic(kvm, &chip->chip.ioapic); break; default: r = -EINVAL; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 9fe140bb38ec..38a2d20b89de 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) union kvm_ioapic_redirect_entry entry; int ret = 1; + mutex_lock(&ioapic->lock); if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; @@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); } + mutex_unlock(&ioapic->lock); + return ret; } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, - int trigger_mode) +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, + int trigger_mode) { - union kvm_ioapic_redirect_entry *ent; + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; - ent = &ioapic->redirtbl[pin]; + if (ent->fields.vector != vector) + continue; - kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); + /* + * We are dropping lock while calling ack notifiers because ack + * notifier callbacks for assigned devices call into IOAPIC + * recursively. Since remote_irr is cleared only after call + * to notifiers if the same vector will be delivered while lock + * is dropped it will be put into irr and will be delivered + * after ack notifier returns. + */ + mutex_unlock(&ioapic->lock); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); + mutex_lock(&ioapic->lock); + + if (trigger_mode != IOAPIC_LEVEL_TRIG) + continue; - if (trigger_mode == IOAPIC_LEVEL_TRIG) { ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << pin))) - ioapic_service(ioapic, pin); + if (!ent->fields.mask && (ioapic->irr & (1 << i))) + ioapic_service(ioapic, i); } } void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - int i; - for (i = 0; i < IOAPIC_NUM_PINS; i++) - if (ioapic->redirtbl[i].fields.vector == vector) - __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); + mutex_lock(&ioapic->lock); + __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); + mutex_unlock(&ioapic->lock); } static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) @@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, ioapic_debug("addr %lx\n", (unsigned long)addr); ASSERT(!(addr & 0xf)); /* check alignment */ - mutex_lock(&ioapic->kvm->irq_lock); addr &= 0xff; + mutex_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: result = ioapic->ioregsel; @@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, result = 0; break; } + mutex_unlock(&ioapic->lock); + switch (len) { case 8: *(u64 *) val = result; @@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, default: printk(KERN_WARNING "ioapic: wrong length %d\n", len); } - mutex_unlock(&ioapic->kvm->irq_lock); return 0; } @@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ - mutex_lock(&ioapic->kvm->irq_lock); if (len == 4 || len == 8) data = *(u32 *) val; else { printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); - goto unlock; + return 0; } addr &= 0xff; + mutex_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: ioapic->ioregsel = data; @@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); + __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); break; #endif default: break; } -unlock: - mutex_unlock(&ioapic->kvm->irq_lock); + mutex_unlock(&ioapic->lock); return 0; } @@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm) ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); if (!ioapic) return -ENOMEM; + mutex_init(&ioapic->lock); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); @@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm) return ret; } +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + mutex_lock(&ioapic->lock); + memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + mutex_unlock(&ioapic->lock); + return 0; +} + +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + mutex_lock(&ioapic->lock); + memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + mutex_unlock(&ioapic->lock); + return 0; +} diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 6e461ade6365..419c43b667ab 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -45,6 +45,7 @@ struct kvm_ioapic { struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); + struct mutex lock; }; #ifdef DEBUG @@ -74,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); + #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 6c946141dbcc..fadf4408a820 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -146,8 +146,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) { - struct kvm_kernel_irq_routing_entry *e; - int ret = -1; + struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i = 0; struct kvm_irq_routing_table *irq_rt; struct hlist_node *n; @@ -162,14 +162,19 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { - int r = e->set(e, kvm, irq_source_id, level); - if (r < 0) - continue; - - ret = r + ((ret < 0) ? 0 : ret); - } + hlist_for_each_entry(e, n, &irq_rt->map[irq], link) + irq_set[i++] = *e; rcu_read_unlock(); + + while(i--) { + int r; + r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + return ret; } -- cgit v1.2.3 From 680b3648ba89c44ac8d0316f78a0d6e147b88809 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:26 +0300 Subject: KVM: Drop kvm->irq_lock lock from irq injection path The only thing it protects now is interrupt injection into lapic and this can work lockless. Even now with kvm->irq_lock in place access to lapic is not entirely serialized since vcpu access doesn't take kvm->irq_lock. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 -- arch/x86/kvm/i8254.c | 2 -- arch/x86/kvm/lapic.c | 2 -- arch/x86/kvm/x86.c | 2 -- virt/kvm/eventfd.c | 2 -- virt/kvm/irq_comm.c | 6 +----- virt/kvm/kvm_main.c | 2 -- 7 files changed, 1 insertion(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4a983147f6eb..f534e0f6bb0d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -982,10 +982,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { __s32 status; - mutex_lock(&kvm->irq_lock); status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); - mutex_unlock(&kvm->irq_lock); if (ioctl == KVM_IRQ_LINE_STATUS) { irq_event.status = status; if (copy_to_user(argp, &irq_event, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 144e7f60b5e2..fab7440c9bb2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) struct kvm_vcpu *vcpu; int i; - mutex_lock(&kvm->irq_lock); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); - mutex_unlock(&kvm->irq_lock); /* * Provides NMI watchdog support via Virtual Wire mode. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index df8bcb0f66d8..8787637b1a9c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -501,9 +501,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, irq.vector); - mutex_lock(&apic->vcpu->kvm->irq_lock); kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); - mutex_unlock(&apic->vcpu->kvm->irq_lock); } static u32 apic_get_tmcct(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fdf989f17a61..5beb4c16caab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2286,10 +2286,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { __s32 status; - mutex_lock(&kvm->irq_lock); status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); - mutex_unlock(&kvm->irq_lock); if (ioctl == KVM_IRQ_LINE_STATUS) { irq_event.status = status; if (copy_to_user(argp, &irq_event, diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bb4ebd89b9ff..30f70fd511c4 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work) struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct kvm *kvm = irqfd->kvm; - mutex_lock(&kvm->irq_lock); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); - mutex_unlock(&kvm->irq_lock); } /* diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index fadf4408a820..15a83b93566d 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -82,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; - WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - if (irq->dest_mode == 0 && irq->dest_id == 0xff && kvm_is_dm_lowest_prio(irq)) printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); @@ -138,7 +136,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } -/* This should be called with the kvm->irq_lock mutex held +/* * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) @@ -153,8 +151,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) trace_kvm_set_irq(irq, level, irq_source_id); - WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6eca153e1a02..c12c95b1b641 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -137,7 +137,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) interrupt_work); kvm = assigned_dev->kvm; - mutex_lock(&kvm->irq_lock); spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { struct kvm_guest_msix_entry *guest_entries = @@ -156,7 +155,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) assigned_dev->guest_irq, 1); spin_unlock_irq(&assigned_dev->assigned_dev_lock); - mutex_unlock(&assigned_dev->kvm->irq_lock); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -- cgit v1.2.3 From 367e1319b229110a27c53221c2fa32a6aa86d4a9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 26 Aug 2009 14:57:07 +0300 Subject: KVM: Return -ENOTTY on unrecognized ioctls Not the incorrect -EINVAL. Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f534e0f6bb0d..f6471c882667 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -941,7 +941,7 @@ long kvm_arch_vm_ioctl(struct file *filp, { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; - int r = -EINVAL; + int r = -ENOTTY; switch (ioctl) { case KVM_SET_MEMORY_REGION: { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2a4551f78f60..95af62217b6b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -421,7 +421,7 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { default: - r = -EINVAL; + r = -ENOTTY; } return r; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 07ced89740d7..00e2ce8e91f5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -150,7 +150,7 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } default: - r = -EINVAL; + r = -ENOTTY; } return r; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5beb4c16caab..829e3063e2ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2176,7 +2176,7 @@ long kvm_arch_vm_ioctl(struct file *filp, { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; - int r = -EINVAL; + int r = -ENOTTY; /* * This union makes it completely explicit to gcc-3.x * that these two variables' stack usage should be -- cgit v1.2.3 From bfd99ff5d483b11c32bca49fbff7a5ac59038b0a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 26 Aug 2009 14:57:50 +0300 Subject: KVM: Move assigned device code to own file Signed-off-by: Avi Kivity --- arch/ia64/kvm/Makefile | 2 +- arch/x86/kvm/Makefile | 3 +- include/linux/kvm_host.h | 17 + virt/kvm/assigned-dev.c | 818 +++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 798 +-------------------------------------------- 5 files changed, 840 insertions(+), 798 deletions(-) create mode 100644 virt/kvm/assigned-dev.c (limited to 'arch/x86') diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 0bb99b732908..1089b3e918ac 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o) + coalesced_mmio.o irq_comm.o assigned-dev.o) ifeq ($(CONFIG_IOMMU_API),y) common-objs += $(addprefix ../../../virt/kvm/, iommu.o) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 0e7fe78d0f74..31a7035c4bd9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -6,7 +6,8 @@ CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o eventfd.o) + coalesced_mmio.o irq_comm.o eventfd.o \ + assigned-dev.o) kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4aa5e1d9a797..c0a1cc35f080 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -577,4 +577,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } #endif + +#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg); + +#else + +static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + return -ENOTTY; +} + #endif + +#endif + diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..fd9c097b760a --- /dev/null +++ b/virt/kvm/assigned-dev.c @@ -0,0 +1,818 @@ +/* + * Kernel-based Virtual Machine - device assignment support + * + * Copyright (C) 2006-9 Red Hat, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq.h" + +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static int find_index_from_host_irq(struct kvm_assigned_dev_kernel + *assigned_dev, int irq) +{ + int i, index; + struct msix_entry *host_msix_entries; + + host_msix_entries = assigned_dev->host_msix_entries; + + index = -1; + for (i = 0; i < assigned_dev->entries_nr; i++) + if (irq == host_msix_entries[i].vector) { + index = i; + break; + } + if (index < 0) { + printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); + return 0; + } + + return index; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + struct kvm *kvm; + int i; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + kvm = assigned_dev->kvm; + + spin_lock_irq(&assigned_dev->assigned_dev_lock); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + struct kvm_guest_msix_entry *guest_entries = + assigned_dev->guest_msix_entries; + for (i = 0; i < assigned_dev->entries_nr; i++) { + if (!(guest_entries[i].flags & + KVM_ASSIGNED_MSIX_PENDING)) + continue; + guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + guest_entries[i].vector, 1); + } + } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + spin_unlock_irq(&assigned_dev->assigned_dev_lock); +} + +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + unsigned long flags; + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int index = find_index_from_host_irq(assigned_dev, irq); + if (index < 0) + goto out; + assigned_dev->guest_msix_entries[index].flags |= + KVM_ASSIGNED_MSIX_PENDING; + } + + schedule_work(&assigned_dev->interrupt_work); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + } + +out: + spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + unsigned long flags; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); + + /* The guest irq may be shared so this ack may be + * from another device. + */ + spin_lock_irqsave(&dev->assigned_dev_lock, flags); + if (dev->host_irq_disabled) { + enable_irq(dev->host_irq); + dev->host_irq_disabled = false; + } + spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); +} + +static void deassign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + assigned_dev->ack_notifier.gsi = -1; + + if (assigned_dev->irq_source_id != -1) + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); + assigned_dev->irq_source_id = -1; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); +} + +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ +static void deassign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int i; + for (i = 0; i < assigned_dev->entries_nr; i++) + disable_irq_nosync(assigned_dev-> + host_msix_entries[i].vector); + + cancel_work_sync(&assigned_dev->interrupt_work); + + for (i = 0; i < assigned_dev->entries_nr; i++) + free_irq(assigned_dev->host_msix_entries[i].vector, + (void *)assigned_dev); + + assigned_dev->entries_nr = 0; + kfree(assigned_dev->host_msix_entries); + kfree(assigned_dev->guest_msix_entries); + pci_disable_msix(assigned_dev->dev); + } else { + /* Deal with MSI and INTx */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); + + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + } + + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); +} + +static int kvm_deassign_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev, + unsigned long irq_requested_type) +{ + unsigned long guest_irq_type, host_irq_type; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + /* no irq assignment to deassign */ + if (!assigned_dev->irq_requested_type) + return -ENXIO; + + host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; + guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; + + if (host_irq_type) + deassign_host_irq(kvm, assigned_dev); + if (guest_irq_type) + deassign_guest_irq(kvm, assigned_dev); + + return 0; +} + +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + kvm_free_assigned_irq(kvm, assigned_dev); + + pci_reset_function(assigned_dev->dev); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int assigned_device_enable_host_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + dev->host_irq = dev->dev->irq; + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, + 0, "kvm_assigned_intx_device", (void *)dev)) + return -EIO; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_host_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int r; + + if (!dev->dev->msi_enabled) { + r = pci_enable_msi(dev->dev); + if (r) + return r; + } + + dev->host_irq = dev->dev->irq; + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_msi_device", (void *)dev)) { + pci_disable_msi(dev->dev); + return -EIO; + } + + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_host_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int i, r = -EINVAL; + + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (dev->entries_nr == 0) + return r; + + r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + if (r) + return r; + + for (i = 0; i < dev->entries_nr; i++) { + r = request_irq(dev->host_msix_entries[i].vector, + kvm_assigned_dev_intr, 0, + "kvm_assigned_msix_device", + (void *)dev); + /* FIXME: free requested_irq's on failure */ + if (r) + return r; + } + + return 0; +} + +#endif + +static int assigned_device_enable_guest_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = irq->guest_irq; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_guest_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_guest_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; + return 0; +} +#endif + +static int assign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + __u32 host_irq_type) +{ + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) + return r; + + switch (host_irq_type) { + case KVM_DEV_IRQ_HOST_INTX: + r = assigned_device_enable_host_intx(kvm, dev); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_HOST_MSI: + r = assigned_device_enable_host_msi(kvm, dev); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_HOST_MSIX: + r = assigned_device_enable_host_msix(kvm, dev); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) + dev->irq_requested_type |= host_irq_type; + + return r; +} + +static int assign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq, + unsigned long guest_irq_type) +{ + int id; + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) + return r; + + id = kvm_request_irq_source_id(kvm); + if (id < 0) + return id; + + dev->irq_source_id = id; + + switch (guest_irq_type) { + case KVM_DEV_IRQ_GUEST_INTX: + r = assigned_device_enable_guest_intx(kvm, dev, irq); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_GUEST_MSI: + r = assigned_device_enable_guest_msi(kvm, dev, irq); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_GUEST_MSIX: + r = assigned_device_enable_guest_msix(kvm, dev, irq); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) { + dev->irq_requested_type |= guest_irq_type; + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + } else + kvm_free_irq_source_id(kvm, dev->irq_source_id); + + return r; +} + +/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int r = -EINVAL; + struct kvm_assigned_dev_kernel *match; + unsigned long host_irq_type, guest_irq_type; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + if (!irqchip_in_kernel(kvm)) + return r; + + mutex_lock(&kvm->lock); + r = -ENODEV; + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); + guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); + + r = -EINVAL; + /* can only assign one type at a time */ + if (hweight_long(host_irq_type) > 1) + goto out; + if (hweight_long(guest_irq_type) > 1) + goto out; + if (host_irq_type == 0 && guest_irq_type == 0) + goto out; + + r = 0; + if (host_irq_type) + r = assign_host_irq(kvm, match, host_irq_type); + if (r) + goto out; + + if (guest_irq_type) + r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = -ENODEV; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + r = kvm_deassign_irq(kvm, match, assigned_irq->flags); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + down_read(&kvm->slots_lock); + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EEXIST; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + + pci_reset_function(dev); + + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->flags = assigned_dev->flags; + match->dev = dev; + spin_lock_init(&match->assigned_dev_lock); + match->irq_source_id = -1; + match->kvm = kvm; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); + if (r) + goto out_list_del; + } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; + } + +out: + mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); + return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); + return r; +} + +static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + printk(KERN_INFO "%s: device hasn't been assigned before, " + "so cannot be deassigned\n", __func__); + r = -EINVAL; + goto out; + } + + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) + kvm_deassign_device(kvm, match); + + kvm_free_assigned_device(kvm, match); + +out: + mutex_unlock(&kvm->lock); + return r; +} + + +#ifdef __KVM_HAVE_MSIX +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, + struct kvm_assigned_msix_nr *entry_nr) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry_nr->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto msix_nr_out; + } + + if (adev->entries_nr == 0) { + adev->entries_nr = entry_nr->entry_nr; + if (adev->entries_nr == 0 || + adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto msix_nr_out; + } + + adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * + entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->host_msix_entries) { + r = -ENOMEM; + goto msix_nr_out; + } + adev->guest_msix_entries = kzalloc( + sizeof(struct kvm_guest_msix_entry) * + entry_nr->entry_nr, GFP_KERNEL); + if (!adev->guest_msix_entries) { + kfree(adev->host_msix_entries); + r = -ENOMEM; + goto msix_nr_out; + } + } else /* Not allowed set MSI-X number twice */ + r = -EINVAL; +msix_nr_out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0, i; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto msix_entry_out; + } + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].vector == 0 || + adev->guest_msix_entries[i].entry == entry->entry) { + adev->guest_msix_entries[i].entry = entry->entry; + adev->guest_msix_entries[i].vector = entry->gsi; + adev->host_msix_entries[i].entry = entry->entry; + break; + } + if (i == adev->entries_nr) { + r = -ENOSPC; + goto msix_entry_out; + } + +msix_entry_out: + mutex_unlock(&kvm->lock); + + return r; +} +#endif + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int r = -ENOTTY; + + switch (ioctl) { + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + r = -EOPNOTSUPP; + break; + } +#ifdef KVM_CAP_ASSIGN_DEV_IRQ + case KVM_ASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } + case KVM_DEASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } +#endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT + case KVM_DEASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } +#endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* KVM_CAP_IRQ_ROUTING */ +#ifdef __KVM_HAVE_MSIX + case KVM_ASSIGN_SET_MSIX_NR: { + struct kvm_assigned_msix_nr entry_nr; + r = -EFAULT; + if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) + goto out; + r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); + if (r) + goto out; + break; + } + case KVM_ASSIGN_SET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); + if (r) + goto out; + break; + } +#endif + } +out: + return r; +} + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c12c95b1b641..38e4d2c34ac1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -53,12 +53,6 @@ #include "coalesced_mmio.h" #endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -#include -#include -#include "irq.h" -#endif - #define CREATE_TRACE_POINTS #include @@ -90,608 +84,6 @@ static bool kvm_rebooting; static bool largepages_enabled = true; -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct list_head *ptr; - struct kvm_assigned_dev_kernel *match; - - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static int find_index_from_host_irq(struct kvm_assigned_dev_kernel - *assigned_dev, int irq) -{ - int i, index; - struct msix_entry *host_msix_entries; - - host_msix_entries = assigned_dev->host_msix_entries; - - index = -1; - for (i = 0; i < assigned_dev->entries_nr; i++) - if (irq == host_msix_entries[i].vector) { - index = i; - break; - } - if (index < 0) { - printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); - return 0; - } - - return index; -} - -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) -{ - struct kvm_assigned_dev_kernel *assigned_dev; - struct kvm *kvm; - int i; - - assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, - interrupt_work); - kvm = assigned_dev->kvm; - - spin_lock_irq(&assigned_dev->assigned_dev_lock); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - struct kvm_guest_msix_entry *guest_entries = - assigned_dev->guest_msix_entries; - for (i = 0; i < assigned_dev->entries_nr; i++) { - if (!(guest_entries[i].flags & - KVM_ASSIGNED_MSIX_PENDING)) - continue; - guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - guest_entries[i].vector, 1); - } - } else - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - - spin_unlock_irq(&assigned_dev->assigned_dev_lock); -} - -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ - unsigned long flags; - struct kvm_assigned_dev_kernel *assigned_dev = - (struct kvm_assigned_dev_kernel *) dev_id; - - spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int index = find_index_from_host_irq(assigned_dev, irq); - if (index < 0) - goto out; - assigned_dev->guest_msix_entries[index].flags |= - KVM_ASSIGNED_MSIX_PENDING; - } - - schedule_work(&assigned_dev->interrupt_work); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - } - -out: - spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); - return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev; - unsigned long flags; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - - /* The guest irq may be shared so this ack may be - * from another device. - */ - spin_lock_irqsave(&dev->assigned_dev_lock, flags); - if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); - dev->host_irq_disabled = false; - } - spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); -} - -static void deassign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - assigned_dev->ack_notifier.gsi = -1; - - if (assigned_dev->irq_source_id != -1) - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); - assigned_dev->irq_source_id = -1; - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); -} - -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void deassign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - /* - * In kvm_free_device_irq, cancel_work_sync return true if: - * 1. work is scheduled, and then cancelled. - * 2. work callback is executed. - * - * The first one ensured that the irq is disabled and no more events - * would happen. But for the second one, the irq may be enabled (e.g. - * for MSI). So we disable irq here to prevent further events. - * - * Notice this maybe result in nested disable if the interrupt type is - * INTx, but it's OK for we are going to free it. - * - * If this function is a part of VM destroy, please ensure that till - * now, the kvm state is still legal for probably we also have to wait - * interrupt_work done. - */ - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int i; - for (i = 0; i < assigned_dev->entries_nr; i++) - disable_irq_nosync(assigned_dev-> - host_msix_entries[i].vector); - - cancel_work_sync(&assigned_dev->interrupt_work); - - for (i = 0; i < assigned_dev->entries_nr; i++) - free_irq(assigned_dev->host_msix_entries[i].vector, - (void *)assigned_dev); - - assigned_dev->entries_nr = 0; - kfree(assigned_dev->host_msix_entries); - kfree(assigned_dev->guest_msix_entries); - pci_disable_msix(assigned_dev->dev); - } else { - /* Deal with MSI and INTx */ - disable_irq_nosync(assigned_dev->host_irq); - cancel_work_sync(&assigned_dev->interrupt_work); - - free_irq(assigned_dev->host_irq, (void *)assigned_dev); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) - pci_disable_msi(assigned_dev->dev); - } - - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); -} - -static int kvm_deassign_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev, - unsigned long irq_requested_type) -{ - unsigned long guest_irq_type, host_irq_type; - - if (!irqchip_in_kernel(kvm)) - return -EINVAL; - /* no irq assignment to deassign */ - if (!assigned_dev->irq_requested_type) - return -ENXIO; - - host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; - guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; - - if (host_irq_type) - deassign_host_irq(kvm, assigned_dev); - if (guest_irq_type) - deassign_guest_irq(kvm, assigned_dev); - - return 0; -} - -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - kvm_free_assigned_irq(kvm, assigned_dev); - - pci_reset_function(assigned_dev->dev); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); - - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int assigned_device_enable_host_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - dev->host_irq = dev->dev->irq; - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(dev->host_irq, kvm_assigned_dev_intr, - 0, "kvm_assigned_intx_device", (void *)dev)) - return -EIO; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_host_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int r; - - if (!dev->dev->msi_enabled) { - r = pci_enable_msi(dev->dev); - if (r) - return r; - } - - dev->host_irq = dev->dev->irq; - if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_msi_device", (void *)dev)) { - pci_disable_msi(dev->dev); - return -EIO; - } - - return 0; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_host_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int i, r = -EINVAL; - - /* host_msix_entries and guest_msix_entries should have been - * initialized */ - if (dev->entries_nr == 0) - return r; - - r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); - if (r) - return r; - - for (i = 0; i < dev->entries_nr; i++) { - r = request_irq(dev->host_msix_entries[i].vector, - kvm_assigned_dev_intr, 0, - "kvm_assigned_msix_device", - (void *)dev); - /* FIXME: free requested_irq's on failure */ - if (r) - return r; - } - - return 0; -} - -#endif - -static int assigned_device_enable_guest_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = irq->guest_irq; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_guest_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; - return 0; -} -#endif -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_guest_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; - return 0; -} -#endif - -static int assign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - __u32 host_irq_type) -{ - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) - return r; - - switch (host_irq_type) { - case KVM_DEV_IRQ_HOST_INTX: - r = assigned_device_enable_host_intx(kvm, dev); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_HOST_MSI: - r = assigned_device_enable_host_msi(kvm, dev); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_HOST_MSIX: - r = assigned_device_enable_host_msix(kvm, dev); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) - dev->irq_requested_type |= host_irq_type; - - return r; -} - -static int assign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq, - unsigned long guest_irq_type) -{ - int id; - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) - return r; - - id = kvm_request_irq_source_id(kvm); - if (id < 0) - return id; - - dev->irq_source_id = id; - - switch (guest_irq_type) { - case KVM_DEV_IRQ_GUEST_INTX: - r = assigned_device_enable_guest_intx(kvm, dev, irq); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_GUEST_MSI: - r = assigned_device_enable_guest_msi(kvm, dev, irq); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_GUEST_MSIX: - r = assigned_device_enable_guest_msix(kvm, dev, irq); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) { - dev->irq_requested_type |= guest_irq_type; - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); - } else - kvm_free_irq_source_id(kvm, dev->irq_source_id); - - return r; -} - -/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int r = -EINVAL; - struct kvm_assigned_dev_kernel *match; - unsigned long host_irq_type, guest_irq_type; - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (!irqchip_in_kernel(kvm)) - return r; - - mutex_lock(&kvm->lock); - r = -ENODEV; - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); - guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - - r = -EINVAL; - /* can only assign one type at a time */ - if (hweight_long(host_irq_type) > 1) - goto out; - if (hweight_long(guest_irq_type) > 1) - goto out; - if (host_irq_type == 0 && guest_irq_type == 0) - goto out; - - r = 0; - if (host_irq_type) - r = assign_host_irq(kvm, match, host_irq_type); - if (r) - goto out; - - if (guest_irq_type) - r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) -{ - int r = -ENODEV; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - r = kvm_deassign_irq(kvm, match, assigned_irq->flags); -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - struct pci_dev *dev; - - down_read(&kvm->slots_lock); - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (match) { - /* device already assigned */ - r = -EEXIST; - goto out; - } - - match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); - if (match == NULL) { - printk(KERN_INFO "%s: Couldn't allocate memory\n", - __func__); - r = -ENOMEM; - goto out; - } - dev = pci_get_bus_and_slot(assigned_dev->busnr, - assigned_dev->devfn); - if (!dev) { - printk(KERN_INFO "%s: host device not found\n", __func__); - r = -EINVAL; - goto out_free; - } - if (pci_enable_device(dev)) { - printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); - r = -EBUSY; - goto out_put; - } - r = pci_request_regions(dev, "kvm_assigned_device"); - if (r) { - printk(KERN_INFO "%s: Could not get access to device regions\n", - __func__); - goto out_disable; - } - - pci_reset_function(dev); - - match->assigned_dev_id = assigned_dev->assigned_dev_id; - match->host_busnr = assigned_dev->busnr; - match->host_devfn = assigned_dev->devfn; - match->flags = assigned_dev->flags; - match->dev = dev; - spin_lock_init(&match->assigned_dev_lock); - match->irq_source_id = -1; - match->kvm = kvm; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - - list_add(&match->list, &kvm->arch.assigned_dev_head); - - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); - if (r) - goto out_list_del; - } - -out: - mutex_unlock(&kvm->lock); - up_read(&kvm->slots_lock); - return r; -out_list_del: - list_del(&match->list); - pci_release_regions(dev); -out_disable: - pci_disable_device(dev); -out_put: - pci_dev_put(dev); -out_free: - kfree(match); - mutex_unlock(&kvm->lock); - up_read(&kvm->slots_lock); - return r; -} -#endif - -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT -static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (!match) { - printk(KERN_INFO "%s: device hasn't been assigned before, " - "so cannot be deassigned\n", __func__); - r = -EINVAL; - goto out; - } - - if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) - kvm_deassign_device(kvm, match); - - kvm_free_assigned_device(kvm, match); - -out: - mutex_unlock(&kvm->lock); - return r; -} -#endif - inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) { @@ -1824,88 +1216,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } -#ifdef __KVM_HAVE_MSIX -static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, - struct kvm_assigned_msix_nr *entry_nr) -{ - int r = 0; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry_nr->assigned_dev_id); - if (!adev) { - r = -EINVAL; - goto msix_nr_out; - } - - if (adev->entries_nr == 0) { - adev->entries_nr = entry_nr->entry_nr; - if (adev->entries_nr == 0 || - adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { - r = -EINVAL; - goto msix_nr_out; - } - - adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * - entry_nr->entry_nr, - GFP_KERNEL); - if (!adev->host_msix_entries) { - r = -ENOMEM; - goto msix_nr_out; - } - adev->guest_msix_entries = kzalloc( - sizeof(struct kvm_guest_msix_entry) * - entry_nr->entry_nr, GFP_KERNEL); - if (!adev->guest_msix_entries) { - kfree(adev->host_msix_entries); - r = -ENOMEM; - goto msix_nr_out; - } - } else /* Not allowed set MSI-X number twice */ - r = -EINVAL; -msix_nr_out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, - struct kvm_assigned_msix_entry *entry) -{ - int r = 0, i; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry->assigned_dev_id); - - if (!adev) { - r = -EINVAL; - goto msix_entry_out; - } - - for (i = 0; i < adev->entries_nr; i++) - if (adev->guest_msix_entries[i].vector == 0 || - adev->guest_msix_entries[i].entry == entry->entry) { - adev->guest_msix_entries[i].entry = entry->entry; - adev->guest_msix_entries[i].vector = entry->gsi; - adev->host_msix_entries[i].entry = entry->entry; - break; - } - if (i == adev->entries_nr) { - r = -ENOSPC; - goto msix_entry_out; - } - -msix_entry_out: - mutex_unlock(&kvm->lock); - - return r; -} -#endif - static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2163,112 +1473,6 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } -#endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT - case KVM_ASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_IRQ: { - r = -EOPNOTSUPP; - break; - } -#ifdef KVM_CAP_ASSIGN_DEV_IRQ - case KVM_ASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } - case KVM_DEASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } -#endif -#endif -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT - case KVM_DEASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } -#endif -#ifdef KVM_CAP_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; - - r = -EFAULT; - if (copy_from_user(&routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - out_free_irq_routing: - vfree(entries); - break; - } -#endif /* KVM_CAP_IRQ_ROUTING */ -#ifdef __KVM_HAVE_MSIX - case KVM_ASSIGN_SET_MSIX_NR: { - struct kvm_assigned_msix_nr entry_nr; - r = -EFAULT; - if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) - goto out; - r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); - if (r) - goto out; - break; - } - case KVM_ASSIGN_SET_MSIX_ENTRY: { - struct kvm_assigned_msix_entry entry; - r = -EFAULT; - if (copy_from_user(&entry, argp, sizeof entry)) - goto out; - r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); - if (r) - goto out; - break; - } #endif case KVM_IRQFD: { struct kvm_irqfd data; @@ -2301,6 +1505,8 @@ static long kvm_vm_ioctl(struct file *filp, #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); + if (r == -ENOTTY) + r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); } out: return r; -- cgit v1.2.3 From 94677e61fdcf4cdae11a1b7c8974d7034ef9bd1b Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Fri, 28 Aug 2009 16:41:44 +0200 Subject: KVM: x86 emulator: Add missing decoder flags for 'or' instructions Add missing decoder flags for or instructions (0xc-0xd). Signed-off-by: Mohammed Gamal Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0644d3df621a..db0820dfbffe 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -99,7 +99,8 @@ static u32 opcode_table[256] = { /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, ImplicitOps | Stack | No64, 0, + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, + ImplicitOps | Stack | No64, 0, /* 0x10 - 0x17 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, -- cgit v1.2.3 From abcf14b560a4ba62c659e6f5aafc8f9934d8c130 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Tue, 1 Sep 2009 15:28:11 +0200 Subject: KVM: x86 emulator: Add pusha and popa instructions This adds pusha and popa instructions (opcodes 0x60-0x61), this enables booting MINIX with invalid guest state emulation on. [marcelo: remove unused variable] Signed-off-by: Mohammed Gamal Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index db0820dfbffe..d226dff47d77 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -139,7 +139,8 @@ static u32 opcode_table[256] = { DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, /* 0x60 - 0x67 */ - 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, + 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 0, 0, 0, 0, /* 0x68 - 0x6F */ SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, @@ -1225,6 +1226,43 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, return rc; } +static void emulate_pusha(struct x86_emulate_ctxt *ctxt) +{ + struct decode_cache *c = &ctxt->decode; + unsigned long old_esp = c->regs[VCPU_REGS_RSP]; + int reg = VCPU_REGS_RAX; + + while (reg <= VCPU_REGS_RDI) { + (reg == VCPU_REGS_RSP) ? + (c->src.val = old_esp) : (c->src.val = c->regs[reg]); + + emulate_push(ctxt); + ++reg; + } +} + +static int emulate_popa(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct decode_cache *c = &ctxt->decode; + int rc = 0; + int reg = VCPU_REGS_RDI; + + while (reg >= VCPU_REGS_RAX) { + if (reg == VCPU_REGS_RSP) { + register_address_increment(c, &c->regs[VCPU_REGS_RSP], + c->op_bytes); + --reg; + } + + rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); + if (rc != 0) + break; + --reg; + } + return rc; +} + static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1816,6 +1854,14 @@ special_insn: if (rc != 0) goto done; break; + case 0x60: /* pusha */ + emulate_pusha(ctxt); + break; + case 0x61: /* popa */ + rc = emulate_popa(ctxt, ops); + if (rc != 0) + goto done; + break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; -- cgit v1.2.3 From 80ced186d1761d2a66163d9eeb468ddb1f7e0697 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Tue, 1 Sep 2009 12:48:18 +0200 Subject: KVM: VMX: Enhance invalid guest state emulation - Change returned handle_invalid_guest_state() to return relevant exit codes - Move triggering the emulation from vmx_vcpu_run() to vmx_handle_exit() - Return to userspace instead of repeatedly trying to emulate instructions that have already failed Signed-off-by: Mohammed Gamal Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4635298d000a..73cb5dd960cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -107,7 +107,6 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; - enum emulation_result invalid_state_emulation_result; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; @@ -3322,35 +3321,37 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) return 1; } -static void handle_invalid_guest_state(struct kvm_vcpu *vcpu) +static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); enum emulation_result err = EMULATE_DONE; - - local_irq_enable(); - preempt_enable(); + int ret = 1; while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, 0, 0, 0); - if (err == EMULATE_DO_MMIO) - break; + if (err == EMULATE_DO_MMIO) { + ret = 0; + goto out; + } if (err != EMULATE_DONE) { kvm_report_emulation_failure(vcpu, "emulation failure"); - break; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + ret = 0; + goto out; } if (signal_pending(current)) - break; + goto out; if (need_resched()) schedule(); } - preempt_disable(); - local_irq_disable(); - - vmx->invalid_state_emulation_result = err; + vmx->emulation_required = 0; +out: + return ret; } /* @@ -3406,13 +3407,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); - /* If we need to emulate an MMIO from handle_invalid_guest_state - * we just return 0 */ - if (vmx->emulation_required && emulate_invalid_guest_state) { - if (guest_state_valid(vcpu)) - vmx->emulation_required = 0; - return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; - } + /* If guest state is invalid, start emulating */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return handle_invalid_guest_state(vcpu); /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ @@ -3607,11 +3604,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) vmx->entry_time = ktime_get(); - /* Handle invalid guest state instead of entering VMX */ - if (vmx->emulation_required && emulate_invalid_guest_state) { - handle_invalid_guest_state(vcpu); + /* Don't enter VMX if guest state is invalid, let the exit handler + start emulation until we arrive back to a valid state */ + if (vmx->emulation_required && emulate_invalid_guest_state) return; - } if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); -- cgit v1.2.3 From e8b3433a5c062e94e34cadb6144c10689a497bc3 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 8 Sep 2009 14:47:38 -0300 Subject: KVM: SVM: remove needless mmap_sem acquision from nested_svm_map nested_svm_map unnecessarily takes mmap_sem around gfn_to_page, since gfn_to_page / get_user_pages are responsible for it. Signed-off-by: Marcelo Tosatti Acked-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 92048a626d4e..f54c4f9d2865 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1396,10 +1396,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) { struct page *page; - down_read(¤t->mm->mmap_sem); page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); - up_read(¤t->mm->mmap_sem); - if (is_error_page(page)) goto error; -- cgit v1.2.3 From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 15 Sep 2009 11:37:46 +0200 Subject: KVM: Activate Virtualization On Demand X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 ++-- arch/powerpc/kvm/powerpc.c | 3 +- arch/s390/kvm/kvm-s390.c | 3 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 13 ++++-- arch/x86/kvm/vmx.c | 11 +++-- arch/x86/kvm/x86.c | 4 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 90 +++++++++++++++++++++++++++++++++++------ 9 files changed, 108 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f6471c882667..5fdeec5fddcf 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { long status; long tmp_base; @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage) slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); local_irq_restore(saved_psr); if (slot < 0) - return; + return -EINVAL; spin_lock(&vp_lock); status = ia64_pal_vp_init_env(kvm_vsa_base ? @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage) __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); if (status != 0) { printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); - return ; + return -EINVAL; } if (!kvm_vsa_base) { @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage) } spin_unlock(&vp_lock); ia64_ptr_entry(0x3, slot); + + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 95af62217b6b..5902bbc2411e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 00e2ce8e91f5..544505893c9f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { /* every s390 is virtualization enabled ;-) */ + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a46e2dd9aca8..295c7c4d9c90 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -459,7 +459,7 @@ struct descriptor_table { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - void (*hardware_enable)(void *dummy); /* __init */ + int (*hardware_enable)(void *dummy); void (*hardware_disable)(void *dummy); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f54c4f9d2865..59fe4d54da11 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage) cpu_svm_disable(); } -static void svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void *garbage) { struct svm_cpu_data *svm_data; @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage) struct desc_struct *gdt; int me = raw_smp_processor_id(); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) + return -EBUSY; + if (!has_svm()) { printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); - return; + return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", me); - return; + return -EINVAL; } svm_data->asid_generation = 1; @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage) gdt = (struct desc_struct *)gdt_descr.base; svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); - rdmsrl(MSR_EFER, efer); wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + + return 0; } static void svm_cpu_uninit(int cpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 73cb5dd960cf..a187570e4837 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void) /* locked but not enabled */ } -static void hardware_enable(void *garbage) +static int hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 old; + if (read_cr4() & X86_CR4_VMXE) + return -EBUSY; + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); if ((old & (FEATURE_CONTROL_LOCKED | @@ -1158,6 +1161,10 @@ static void hardware_enable(void *garbage) asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); + + ept_sync_global(); + + return 0; } static void vmclear_local_vcpus(void) @@ -4040,8 +4047,6 @@ static int __init vmx_init(void) if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); - ept_sync_global(); - return 0; out3: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 829e3063e2ab..3d83de8bcbf4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4691,9 +4691,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) return kvm_x86_ops->vcpu_reset(vcpu); } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { - kvm_x86_ops->hardware_enable(garbage); + return kvm_x86_ops->hardware_enable(garbage); } void kvm_arch_hardware_disable(void *garbage) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c0a1cc35f080..b985a29d8175 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,7 +345,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); -void kvm_arch_hardware_enable(void *garbage); +int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38e4d2c34ac1..70c8cbea0a99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; +static int kvm_usage_count = 0; +static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +static int hardware_enable_all(void); +static void hardware_disable_all(void); static bool kvm_rebooting; @@ -339,6 +343,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { static struct kvm *kvm_create_vm(void) { + int r = 0; struct kvm *kvm = kvm_arch_create_vm(); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct page *page; @@ -346,6 +351,11 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; + + r = hardware_enable_all(); + if (r) + goto out_err_nodisable; + #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); @@ -354,8 +364,8 @@ static struct kvm *kvm_create_vm(void) #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { - kfree(kvm); - return ERR_PTR(-ENOMEM); + r = -ENOMEM; + goto out_err; } kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)page_address(page); @@ -363,15 +373,13 @@ static struct kvm *kvm_create_vm(void) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) { - int err; kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; - err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); - if (err) { + r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); + if (r) { #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET put_page(page); #endif - kfree(kvm); - return ERR_PTR(err); + goto out_err; } } #endif @@ -395,6 +403,12 @@ static struct kvm *kvm_create_vm(void) #endif out: return kvm; + +out_err: + hardware_disable_all(); +out_err_nodisable: + kfree(kvm); + return ERR_PTR(r); } /* @@ -453,6 +467,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow(kvm); #endif kvm_arch_destroy_vm(kvm); + hardware_disable_all(); mmdrop(mm); } @@ -1644,11 +1659,21 @@ static struct miscdevice kvm_dev = { static void hardware_enable(void *junk) { int cpu = raw_smp_processor_id(); + int r; if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; + cpumask_set_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_enable(NULL); + + r = kvm_arch_hardware_enable(NULL); + + if (r) { + cpumask_clear_cpu(cpu, cpus_hardware_enabled); + atomic_inc(&hardware_enable_failed); + printk(KERN_INFO "kvm: enabling virtualization on " + "CPU%d failed\n", cpu); + } } static void hardware_disable(void *junk) @@ -1661,11 +1686,52 @@ static void hardware_disable(void *junk) kvm_arch_hardware_disable(NULL); } +static void hardware_disable_all_nolock(void) +{ + BUG_ON(!kvm_usage_count); + + kvm_usage_count--; + if (!kvm_usage_count) + on_each_cpu(hardware_disable, NULL, 1); +} + +static void hardware_disable_all(void) +{ + spin_lock(&kvm_lock); + hardware_disable_all_nolock(); + spin_unlock(&kvm_lock); +} + +static int hardware_enable_all(void) +{ + int r = 0; + + spin_lock(&kvm_lock); + + kvm_usage_count++; + if (kvm_usage_count == 1) { + atomic_set(&hardware_enable_failed, 0); + on_each_cpu(hardware_enable, NULL, 1); + + if (atomic_read(&hardware_enable_failed)) { + hardware_disable_all_nolock(); + r = -EBUSY; + } + } + + spin_unlock(&kvm_lock); + + return r; +} + static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { int cpu = (long)v; + if (!kvm_usage_count) + return NOTIFY_OK; + val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: @@ -1868,13 +1934,15 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - hardware_disable(NULL); + if (kvm_usage_count) + hardware_disable(NULL); return 0; } static int kvm_resume(struct sys_device *dev) { - hardware_enable(NULL); + if (kvm_usage_count) + hardware_enable(NULL); return 0; } @@ -1949,7 +2017,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1999,7 +2066,6 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0a: -- cgit v1.2.3 From bfc33beaed3ecf0f92612dc7fb7095029ae7722e Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 16 Sep 2009 21:09:39 +0800 Subject: KVM: remove duplicated #include Remove duplicated #include('s) in arch/x86/kvm/lapic.c Signed-off-by: Huang Weiyi Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8787637b1a9c..cd60c0bd1b32 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -32,7 +32,6 @@ #include #include #include -#include #include "kvm_cache_regs.h" #include "irq.h" #include "trace.h" -- cgit v1.2.3 From 7fcdb5103d10d2eb75876637a2efa9679cce14d3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:15 +0200 Subject: KVM: SVM: reorganize svm_interrupt_allowed This patch reorganizes the logic in svm_interrupt_allowed to make it better to read. This is important because the logic is a lot more complicated with Nested SVM. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59fe4d54da11..3f3fe815c21b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2472,10 +2472,18 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - return (vmcb->save.rflags & X86_EFLAGS_IF) && - !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && - gif_set(svm) && - !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); + int ret; + + if (!gif_set(svm) || + (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) + return 0; + + ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); + + if (is_nested(svm)) + return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); + + return ret; } static void enable_irq_window(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 33527ad7e12a5cb50b39165945464600ab2f7632 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:16 +0200 Subject: KVM: SVM: don't copy exit_int_info on nested vmrun The exit_int_info field is only written by the hardware and never read. So it does not need to be copied on a vmrun emulation. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3f3fe815c21b..41c996ab87e9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1797,8 +1797,6 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.intercept = nested_vmcb->control.intercept; force_new_asid(&svm->vcpu); - svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; - svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", -- cgit v1.2.3 From e935d48e1b49451490218e1181d9834176200955 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 16 Sep 2009 15:24:19 +0200 Subject: KVM: SVM: Remove remaining occurences of rdtscll This patch replaces them with native_read_tsc() which can also be used in expressions and saves a variable on the stack in this case. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 41c996ab87e9..9a4dacab6d8a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -763,14 +763,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int i; if (unlikely(cpu != vcpu->cpu)) { - u64 tsc_this, delta; + u64 delta; /* * Make sure that the guest sees a monotonically * increasing TSC. */ - rdtscll(tsc_this); - delta = vcpu->arch.host_tsc - tsc_this; + delta = vcpu->arch.host_tsc - native_read_tsc(); svm->vmcb->control.tsc_offset += delta; if (is_nested(svm)) svm->nested.hsave->control.tsc_offset += delta; @@ -792,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); - rdtscll(vcpu->arch.host_tsc); + vcpu->arch.host_tsc = native_read_tsc(); } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From b820cc0ca20fdcf8014d8e57421cf29095e39392 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:34 -1000 Subject: KVM: Separate timer intialization into an indepedent function Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d83de8bcbf4..6a31dfb8849c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3118,9 +3118,22 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { .notifier_call = kvmclock_cpufreq_notifier }; +static void kvm_timer_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + tsc_khz_ref = tsc_khz; + cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } +} + int kvm_arch_init(void *opaque) { - int r, cpu; + int r; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { @@ -3152,13 +3165,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0); - for_each_possible_cpu(cpu) - per_cpu(cpu_tsc_khz, cpu) = tsc_khz; - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { - tsc_khz_ref = tsc_khz; - cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } + kvm_timer_init(); return 0; -- cgit v1.2.3 From 0cca790753bf0cab4b070801a46df8e1297c17f6 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:35 -1000 Subject: KVM: Kill the confusing tsc_ref_khz and ref_freq variables They are globals, not clearly protected by any ordering or locking, and vulnerable to various startup races. Instead, for variable TSC machines, register the cpufreq notifier and get the TSC frequency directly from the cpufreq machinery. Not only is it always right, it is also perfectly accurate, as no error prone measurement is required. On such machines, when a new CPU online is brought online, it isn't clear what frequency it will start with, and it may not correspond to the reference, thus in hardware_enable we clear the cpu_tsc_khz variable to zero and make sure it is set before running on a VCPU. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6a31dfb8849c..6f758567831a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1326,6 +1326,8 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) + per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu); kvm_request_guest_time_update(vcpu); } @@ -3063,9 +3065,6 @@ static void bounce_off(void *info) /* nothing */ } -static unsigned int ref_freq; -static unsigned long tsc_khz_ref; - static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -3074,14 +3073,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va struct kvm_vcpu *vcpu; int i, send_ipi = 0; - if (!ref_freq) - ref_freq = freq->old; - if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) return 0; if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) return 0; - per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { @@ -3122,12 +3118,14 @@ static void kvm_timer_init(void) { int cpu; - for_each_possible_cpu(cpu) - per_cpu(cpu_tsc_khz, cpu) = tsc_khz; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { - tsc_khz_ref = tsc_khz; cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); + for_each_online_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu); + } else { + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; } } @@ -4700,6 +4698,14 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) int kvm_arch_hardware_enable(void *garbage) { + /* + * Since this may be called from a hotplug notifcation, + * we can't get the CPU frequency directly. + */ + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + int cpu = raw_smp_processor_id(); + per_cpu(cpu_tsc_khz, cpu) = 0; + } return kvm_x86_ops->hardware_enable(garbage); } -- cgit v1.2.3 From e6732a5af9dfcc87078706a1598df0efe5010f73 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:36 -1000 Subject: KVM: Fix printk name error in svm.c Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9a4dacab6d8a..d1036ce8917e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -330,13 +330,14 @@ static int svm_hardware_enable(void *garbage) return -EBUSY; if (!has_svm()) { - printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); + printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", + me); return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { - printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", + printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", me); return -EINVAL; } -- cgit v1.2.3 From 3230bb4707278dba25e24cd0a11ea7b2337678ee Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 29 Sep 2009 11:38:37 -1000 Subject: KVM: Fix hotplug of CPUs Both VMX and SVM require per-cpu memory allocation, which is done at module init time, for only online cpus. Backend was not allocating enough structure for all possible CPUs, so new CPUs coming online could not be hardware enabled. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1036ce8917e..02a4269be645 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -482,7 +482,7 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME); } - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) goto err; @@ -516,7 +516,7 @@ static __exit void svm_hardware_unsetup(void) { int cpu; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a187570e4837..97f4265cda38 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1350,15 +1350,17 @@ static void free_kvm_area(void) { int cpu; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) { free_vmcs(per_cpu(vmxarea, cpu)); + per_cpu(vmxarea, cpu) = NULL; + } } static __init int alloc_kvm_area(void) { int cpu; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { struct vmcs *vmcs; vmcs = alloc_vmcs_cpu(cpu); -- cgit v1.2.3 From 201d945bcfb0d53e67c9c081f7c28532eb4669c7 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 30 Sep 2009 17:39:07 +0200 Subject: KVM: remove pre_task_link setting in save_state_to_tss16 Now, also remove pre_task_link setting in save_state_to_tss16. commit b237ac37a149e8b56436fabf093532483bff13b0 Author: Gleb Natapov Date: Mon Mar 30 16:03:24 2009 +0300 KVM: Fix task switch back link handling. CC: Gleb Natapov Signed-off-by: Juan Quintela Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f758567831a..5f44d565cc9b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4203,7 +4203,6 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); - tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); } static int load_state_from_tss16(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 355be0b9300579e02275d7d19374806a974ce622 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 3 Oct 2009 00:31:21 +0200 Subject: KVM: x86: Refactor guest debug IOCTL handling Much of so far vendor-specific code for setting up guest debug can actually be handled by the generic code. This also fixes a minor deficit in the SVM part /wrt processing KVM_GUESTDBG_ENABLE. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 14 ++------------ arch/x86/kvm/vmx.c | 18 +----------------- arch/x86/kvm/x86.c | 28 +++++++++++++++++++++------- 4 files changed, 26 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 295c7c4d9c90..e7f870832603 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -475,8 +475,8 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - int (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg); + void (*set_guest_debug)(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 02a4269be645..279a2ae21b4f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1065,26 +1065,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int old_debug = vcpu->guest_debug; struct vcpu_svm *svm = to_svm(vcpu); - vcpu->guest_debug = dbg->control; - - update_db_intercept(vcpu); - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; else svm->vmcb->save.dr7 = vcpu->arch.dr7; - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) - svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - - return 0; + update_db_intercept(vcpu); } static void load_host_msrs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 97f4265cda38..70020e505c22 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1096,30 +1096,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int old_debug = vcpu->guest_debug; - unsigned long flags; - - vcpu->guest_debug = dbg->control; - if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) - vcpu->guest_debug = 0; - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); else vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - flags = vmcs_readl(GUEST_RFLAGS); - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) - flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); - update_exception_bitmap(vcpu); - - return 0; } static __init int cpu_has_kvm_support(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f44d565cc9b..a06f88e66c89 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4472,12 +4472,19 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int i, r; + unsigned long rflags; + int old_debug; + int i; vcpu_load(vcpu); - if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { + old_debug = vcpu->guest_debug; + + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) + vcpu->guest_debug = 0; + + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { for (i = 0; i < KVM_NR_DB_REGS; ++i) vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; vcpu->arch.switch_db_regs = @@ -4488,16 +4495,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } - r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + rflags = kvm_x86_ops->get_rflags(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) + rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + kvm_x86_ops->set_rflags(vcpu, rflags); - if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_x86_ops->set_guest_debug(vcpu, dbg); + + if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_DB) kvm_queue_exception(vcpu, DB_VECTOR); - else if (dbg->control & KVM_GUESTDBG_INJECT_BP) + else if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_BP) kvm_queue_exception(vcpu, BP_VECTOR); vcpu_put(vcpu); - return r; + return 0; } /* -- cgit v1.2.3 From a68a6a7282373bedba8a2ed751b6384edb983a64 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 1 Oct 2009 19:28:39 -0300 Subject: KVM: x86: disable paravirt mmu reporting Disable paravirt MMU capability reporting, so that new (or rebooted) guests switch to native operation. Paravirt MMU is a burden to maintain and does not bring significant advantages compared to shadow anymore. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a06f88e66c89..4693f915f3bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1238,8 +1238,8 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_NR_MEMSLOTS: r = KVM_MEMORY_SLOTS; break; - case KVM_CAP_PV_MMU: - r = !tdp_enabled; + case KVM_CAP_PV_MMU: /* obsolete */ + r = 0; break; case KVM_CAP_IOMMU: r = iommu_found(); -- cgit v1.2.3 From 91586a3b7d79432772a3cdcb81473cd08a237c79 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 5 Oct 2009 13:07:21 +0200 Subject: KVM: x86: Rework guest single-step flag injection and filtering Push TF and RF injection and filtering on guest single-stepping into the vender get/set_rflags callbacks. This makes the whole mechanism more robust wrt user space IOCTL order and instruction emulations. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/x86.c | 77 ++++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e7f870832603..179a919f53a4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -614,6 +614,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); +unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); + void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4693f915f3bd..385cd0a1e23d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -235,6 +235,25 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) } EXPORT_SYMBOL_GPL(kvm_require_cpl); +unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) +{ + unsigned long rflags; + + rflags = kvm_x86_ops->get_rflags(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); + return rflags; +} +EXPORT_SYMBOL_GPL(kvm_get_rflags); + +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + kvm_x86_ops->set_rflags(vcpu, rflags); +} +EXPORT_SYMBOL_GPL(kvm_set_rflags); + /* * Load the pae pdptrs. Return true is they are all valid. */ @@ -2777,7 +2796,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.mode = (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_REAL : cs_l @@ -2855,7 +2874,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (vcpu->mmio_is_write) { vcpu->mmio_needed = 0; @@ -3291,7 +3310,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long *rflags) { kvm_lmsw(vcpu, msw); - *rflags = kvm_x86_ops->get_rflags(vcpu); + *rflags = kvm_get_rflags(vcpu); } unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) @@ -3329,7 +3348,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, switch (cr) { case 0: kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); - *rflags = kvm_x86_ops->get_rflags(vcpu); + *rflags = kvm_get_rflags(vcpu); break; case 2: vcpu->arch.cr2 = val; @@ -3460,7 +3479,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); if (irqchip_in_kernel(vcpu->kvm)) @@ -3840,13 +3859,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) #endif regs->rip = kvm_rip_read(vcpu); - regs->rflags = kvm_x86_ops->get_rflags(vcpu); - - /* - * Don't leak debug flags in case they were set for guest debugging - */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + regs->rflags = kvm_get_rflags(vcpu); vcpu_put(vcpu); @@ -3874,12 +3887,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); - #endif kvm_rip_write(vcpu, regs->rip); - kvm_x86_ops->set_rflags(vcpu, regs->rflags); - + kvm_set_rflags(vcpu, regs->rflags); vcpu->arch.exception.pending = false; @@ -4098,7 +4109,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) { return (seg != VCPU_SREG_LDTR) && (seg != VCPU_SREG_TR) && - (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); + (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); } int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, @@ -4126,7 +4137,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, { tss->cr3 = vcpu->arch.cr3; tss->eip = kvm_rip_read(vcpu); - tss->eflags = kvm_x86_ops->get_rflags(vcpu); + tss->eflags = kvm_get_rflags(vcpu); tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); @@ -4150,7 +4161,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, kvm_set_cr3(vcpu, tss->cr3); kvm_rip_write(vcpu, tss->eip); - kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); + kvm_set_rflags(vcpu, tss->eflags | 2); kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); @@ -4188,7 +4199,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, struct tss_segment_16 *tss) { tss->ip = kvm_rip_read(vcpu); - tss->flag = kvm_x86_ops->get_rflags(vcpu); + tss->flag = kvm_get_rflags(vcpu); tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); @@ -4209,7 +4220,7 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, struct tss_segment_16 *tss) { kvm_rip_write(vcpu, tss->ip); - kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); + kvm_set_rflags(vcpu, tss->flag | 2); kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); @@ -4355,8 +4366,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } if (reason == TASK_SWITCH_IRET) { - u32 eflags = kvm_x86_ops->get_rflags(vcpu); - kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); + u32 eflags = kvm_get_rflags(vcpu); + kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); } /* set back link to prev task only if NT bit is set in eflags @@ -4377,8 +4388,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) old_tss_base, &nseg_desc); if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { - u32 eflags = kvm_x86_ops->get_rflags(vcpu); - kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); + u32 eflags = kvm_get_rflags(vcpu); + kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); } if (reason != TASK_SWITCH_IRET) { @@ -4473,12 +4484,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { unsigned long rflags; - int old_debug; int i; vcpu_load(vcpu); - old_debug = vcpu->guest_debug; + /* + * Read rflags as long as potentially injected trace flags are still + * filtered out. + */ + rflags = kvm_get_rflags(vcpu); vcpu->guest_debug = dbg->control; if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) @@ -4495,12 +4509,11 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } - rflags = kvm_x86_ops->get_rflags(vcpu); - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) - rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - kvm_x86_ops->set_rflags(vcpu, rflags); + /* + * Trigger an rflags update that will inject or remove the trace + * flags. + */ + kvm_set_rflags(vcpu, rflags); kvm_x86_ops->set_guest_debug(vcpu, dbg); -- cgit v1.2.3 From e3267cbbbfbcbe9c18833e89b10beabb1117cb55 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 6 Oct 2009 13:24:50 -0400 Subject: KVM: x86: include pvclock MSRs in msrs_to_save For a while now, we are issuing a rdmsr instruction to find out which msrs in our save list are really supported by the underlying machine. However, it fails to account for kvm-specific msrs, such as the pvclock ones. This patch moves then to the beginning of the list, and skip testing them. Cc: stable@kernel.org Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 385cd0a1e23d..4de5bc0a8e86 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -503,16 +503,19 @@ static inline u32 bit(int bitno) * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. * * This list is modified at module load time to reflect the - * capabilities of the host cpu. + * capabilities of the host cpu. This capabilities test skips MSRs that are + * kvm-specific. Those are put in the beginning of the list. */ + +#define KVM_SAVE_MSRS_BEGIN 2 static u32 msrs_to_save[] = { + MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, MSR_K6_STAR, #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA + MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; @@ -2446,7 +2449,8 @@ static void kvm_init_msr_list(void) u32 dummy[2]; unsigned i, j; - for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { + /* skip the first msrs in the list. KVM-specific */ + for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) continue; if (j < i) -- cgit v1.2.3 From 8d23c4662427507f432c96ac4fa3b76f0a8360cd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 9 Oct 2009 16:08:25 +0200 Subject: KVM: SVM: Notify nested hypervisor of lost event injections If event_inj is valid on a #vmexit the host CPU would write the contents to exit_int_info, so the hypervisor knows that the event wasn't injected. We don't do this in nested SVM by now which is a bug and fixed by this patch. Signed-off-by: Alexander Graf Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 279a2ae21b4f..e37285446cb7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1615,6 +1615,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; + + /* + * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have + * to make sure that we do not lose injected events. So check event_inj + * here and copy it to exit_int_info if it is valid. + * Exit_int_info and event_inj can't be both valid because the case + * below only happens on a VMRUN instruction intercept which has + * no valid exit_int_info set. + */ + if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { + struct vmcb_control_area *nc = &nested_vmcb->control; + + nc->exit_int_info = vmcb->control.event_inj; + nc->exit_int_info_err = vmcb->control.event_inj_err; + } + nested_vmcb->control.tlb_ctl = 0; nested_vmcb->control.event_inj = 0; nested_vmcb->control.event_inj_err = 0; -- cgit v1.2.3 From cd3ff653ae0b45bac7a19208e9c75034fcacc85f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:26 +0200 Subject: KVM: SVM: Move INTR vmexit out of atomic code The nested SVM code emulates a #vmexit caused by a request to open the irq window right in the request function. This is a bug because the request function runs with preemption and interrupts disabled but the #vmexit emulation might sleep. This can cause a schedule()-while-atomic bug and is fixed with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e37285446cb7..884bffc70c7f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -85,6 +85,9 @@ struct nested_state { /* gpa pointers to the real vectors */ u64 vmcb_msrpm; + /* A VMEXIT is required but not yet emulated */ + bool exit_required; + /* cache for intercepts of the guest */ u16 intercept_cr_read; u16 intercept_cr_write; @@ -1379,7 +1382,14 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) svm->vmcb->control.exit_code = SVM_EXIT_INTR; - if (nested_svm_exit_handled(svm)) { + if (svm->nested.intercept & 1ULL) { + /* + * The #vmexit can't be emulated here directly because this + * code path runs with irqs and preemtion disabled. A + * #vmexit emulation might sleep. Only signal request for + * the #vmexit here. + */ + svm->nested.exit_required = true; nsvm_printk("VMexit -> INTR\n"); return 1; } @@ -2340,6 +2350,13 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, svm->vmcb->save.rip); + if (unlikely(svm->nested.exit_required)) { + nested_svm_vmexit(svm); + svm->nested.exit_required = false; + + return 1; + } + if (is_nested(svm)) { int vmexit; @@ -2615,6 +2632,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) u16 gs_selector; u16 ldt_selector; + /* + * A vmexit emulation is required before the vcpu can be executed + * again. + */ + if (unlikely(svm->nested.exit_required)) + return; + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; -- cgit v1.2.3 From 0ac406de8f3780c8e0801d5719e1ec531d4a6ec4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:27 +0200 Subject: KVM: SVM: Add tracepoint for nested vmrun This patch adds a dedicated kvm tracepoint for a nested vmrun. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 40 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 884bffc70c7f..907af3f3a7af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1726,6 +1726,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) /* nested_vmcb is our indicator if nested SVM is activated */ svm->nested.vmcb = svm->vmcb->save.rax; + trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, + nested_vmcb->save.rip, + nested_vmcb->control.int_ctl, + nested_vmcb->control.event_inj, + nested_vmcb->control.nested_ctl); + /* Clear internal status */ kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0d480e77eacf..b5798e12182a 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -349,6 +349,39 @@ TRACE_EVENT(kvm_apic_accept_irq, __entry->coalesced ? " (coalesced)" : "") ); +/* + * Tracepoint for nested VMRUN + */ +TRACE_EVENT(kvm_nested_vmrun, + TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, + __u32 event_inj, bool npt), + TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u64, vmcb ) + __field( __u64, nested_rip ) + __field( __u32, int_ctl ) + __field( __u32, event_inj ) + __field( bool, npt ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->vmcb = vmcb; + __entry->nested_rip = nested_rip; + __entry->int_ctl = int_ctl; + __entry->event_inj = event_inj; + __entry->npt = npt; + ), + + TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " + "event_inj: 0x%08x npt: %s\n", + __entry->rip, __entry->vmcb, __entry->nested_rip, + __entry->int_ctl, __entry->event_inj, + __entry->npt ? "on" : "off") +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4de5bc0a8e86..3ab2f9042dd0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4984,3 +4984,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); -- cgit v1.2.3 From d8cabddf7e8fbdced2dd668c98d7762c7ef75245 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:28 +0200 Subject: KVM: SVM: Add tracepoint for nested #vmexit This patch adds a tracepoint for every #vmexit we get from a nested guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/trace.h | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 43 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 907af3f3a7af..edf6e8b2b84e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2366,6 +2366,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (is_nested(svm)) { int vmexit; + trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, + svm->vmcb->control.exit_info_1, + svm->vmcb->control.exit_info_2, + svm->vmcb->control.exit_int_info, + svm->vmcb->control.exit_int_info_err); + nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", exit_code, svm->vmcb->control.exit_info_1, svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b5798e12182a..a7eb6299a261 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -382,6 +382,42 @@ TRACE_EVENT(kvm_nested_vmrun, __entry->npt ? "on" : "off") ); +/* + * Tracepoint for #VMEXIT while nested + */ +TRACE_EVENT(kvm_nested_vmexit, + TP_PROTO(__u64 rip, __u32 exit_code, + __u64 exit_info1, __u64 exit_info2, + __u32 exit_int_info, __u32 exit_int_info_err), + TP_ARGS(rip, exit_code, exit_info1, exit_info2, + exit_int_info, exit_int_info_err), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u32, exit_code ) + __field( __u64, exit_info1 ) + __field( __u64, exit_info2 ) + __field( __u32, exit_int_info ) + __field( __u32, exit_int_info_err ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->exit_code = exit_code; + __entry->exit_info1 = exit_info1; + __entry->exit_info2 = exit_info2; + __entry->exit_int_info = exit_int_info; + __entry->exit_int_info_err = exit_int_info_err; + ), + TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", + __entry->rip, + ftrace_print_symbols_seq(p, __entry->exit_code, + kvm_x86_ops->exit_reasons_str), + __entry->exit_info1, __entry->exit_info2, + __entry->exit_int_info, __entry->exit_int_info_err) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ab2f9042dd0..192d58efc6dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4985,3 +4985,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); -- cgit v1.2.3 From 17897f366847a9ef8a13e3671a0eb1c15422abed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:29 +0200 Subject: KVM: SVM: Add tracepoint for injected #vmexit This patch adds a tracepoint for a nested #vmexit that gets re-injected to the guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/trace.h | 33 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 40 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index edf6e8b2b84e..369eeb86e87c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1592,6 +1592,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; + trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, + vmcb->control.exit_info_1, + vmcb->control.exit_info_2, + vmcb->control.exit_int_info, + vmcb->control.exit_int_info_err); + nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); if (!nested_vmcb) return 1; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a7eb6299a261..4d6bb5ee39b5 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -418,6 +418,39 @@ TRACE_EVENT(kvm_nested_vmexit, __entry->exit_int_info, __entry->exit_int_info_err) ); +/* + * Tracepoint for #VMEXIT reinjected to the guest + */ +TRACE_EVENT(kvm_nested_vmexit_inject, + TP_PROTO(__u32 exit_code, + __u64 exit_info1, __u64 exit_info2, + __u32 exit_int_info, __u32 exit_int_info_err), + TP_ARGS(exit_code, exit_info1, exit_info2, + exit_int_info, exit_int_info_err), + + TP_STRUCT__entry( + __field( __u32, exit_code ) + __field( __u64, exit_info1 ) + __field( __u64, exit_info2 ) + __field( __u32, exit_int_info ) + __field( __u32, exit_int_info_err ) + ), + + TP_fast_assign( + __entry->exit_code = exit_code; + __entry->exit_info1 = exit_info1; + __entry->exit_info2 = exit_info2; + __entry->exit_int_info = exit_int_info; + __entry->exit_int_info_err = exit_int_info_err; + ), + + TP_printk("reason: %s ext_inf1: 0x%016llx " + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", + ftrace_print_symbols_seq(p, __entry->exit_code, + kvm_x86_ops->exit_reasons_str), + __entry->exit_info1, __entry->exit_info2, + __entry->exit_int_info, __entry->exit_int_info_err) +); #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 192d58efc6dc..a522d9ba81b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4986,3 +4986,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); -- cgit v1.2.3 From 236649de3360916ef85f95c82723af17a25b9179 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:30 +0200 Subject: KVM: SVM: Add tracepoint for #vmexit because intr pending This patch adds a special tracepoint for the event that a nested #vmexit is injected because kvm wants to inject an interrupt into the guest. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/trace.h | 18 ++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 369eeb86e87c..78a391c60a75 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1390,7 +1390,7 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) * the #vmexit here. */ svm->nested.exit_required = true; - nsvm_printk("VMexit -> INTR\n"); + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); return 1; } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4d6bb5ee39b5..3cc8f444be14 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -451,6 +451,24 @@ TRACE_EVENT(kvm_nested_vmexit_inject, __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); + +/* + * Tracepoint for nested #vmexit because of interrupt pending + */ +TRACE_EVENT(kvm_nested_intr_vmexit, + TP_PROTO(__u64 rip), + TP_ARGS(rip), + + TP_STRUCT__entry( + __field( __u64, rip ) + ), + + TP_fast_assign( + __entry->rip = rip + ), + + TP_printk("rip: 0x%016llx\n", __entry->rip) +); #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a522d9ba81b3..2cf4146b425a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4987,3 +4987,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); -- cgit v1.2.3 From ec1ff79084fccdae0dca9b04b89dcdf3235bbfa1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:31 +0200 Subject: KVM: SVM: Add tracepoint for invlpga instruction This patch adds a tracepoint for the event that the guest executed the INVLPGA instruction. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 3 +++ arch/x86/kvm/trace.h | 23 +++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 27 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 78a391c60a75..ba18fb7d3657 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1976,6 +1976,9 @@ static int invlpga_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; nsvm_printk("INVLPGA\n"); + trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], + vcpu->arch.regs[VCPU_REGS_RAX]); + /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 3cc8f444be14..7e1f08e959bc 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -469,6 +469,29 @@ TRACE_EVENT(kvm_nested_intr_vmexit, TP_printk("rip: 0x%016llx\n", __entry->rip) ); + +/* + * Tracepoint for nested #vmexit because of interrupt pending + */ +TRACE_EVENT(kvm_invlpga, + TP_PROTO(__u64 rip, int asid, u64 address), + TP_ARGS(rip, asid, address), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( int, asid ) + __field( __u64, address ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->asid = asid; + __entry->address = address; + ), + + TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", + __entry->rip, __entry->asid, __entry->address) +); #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2cf4146b425a..86596fc7941c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4988,3 +4988,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); -- cgit v1.2.3 From 532a46b98963f110e9425a251e127d6537915dde Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:32 +0200 Subject: KVM: SVM: Add tracepoint for skinit instruction This patch adds a tracepoint for the event that the guest executed the SKINIT instruction. This information is important because SKINIT is an SVM extenstion not yet implemented by nested SVM and we may need this information for debugging hypervisors that do not yet run on nested SVM. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 10 +++++++++- arch/x86/kvm/trace.h | 22 ++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ba18fb7d3657..8b9f6fbba48c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1987,6 +1987,14 @@ static int invlpga_interception(struct vcpu_svm *svm) return 1; } +static int skinit_interception(struct vcpu_svm *svm) +{ + trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); + + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); @@ -2350,7 +2358,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_VMSAVE] = vmsave_interception, [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, - [SVM_EXIT_SKINIT] = invalid_op_interception, + [SVM_EXIT_SKINIT] = skinit_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, [SVM_EXIT_MWAIT] = invalid_op_interception, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 7e1f08e959bc..816e0449db0b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -492,6 +492,28 @@ TRACE_EVENT(kvm_invlpga, TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", __entry->rip, __entry->asid, __entry->address) ); + +/* + * Tracepoint for nested #vmexit because of interrupt pending + */ +TRACE_EVENT(kvm_skinit, + TP_PROTO(__u64 rip, __u32 slb), + TP_ARGS(rip, slb), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u32, slb ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->slb = slb; + ), + + TP_printk("rip: 0x%016llx slb: 0x%08x\n", + __entry->rip, __entry->slb) +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 86596fc7941c..098e7f886306 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4989,3 +4989,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); -- cgit v1.2.3 From d36f19e9ecd22dc035ef4cc6361b564be650f8e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 16:08:33 +0200 Subject: KVM: SVM: Remove nsvm_printk debugging code With all important informations now delivered through tracepoints we can savely remove the nsvm_printk debugging code for nested svm. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8b9f6fbba48c..69610c5d6dea 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -53,15 +53,6 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) -/* Turn on to get debugging output*/ -/* #define NESTED_DEBUG */ - -#ifdef NESTED_DEBUG -#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) -#else -#define nsvm_printk(fmt, args...) do {} while(0) -#endif - static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, @@ -1540,14 +1531,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) } default: { u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); - nsvm_printk("exit code: 0x%x\n", exit_code); if (svm->nested.intercept & exit_bits) vmexit = NESTED_EXIT_DONE; } } if (vmexit == NESTED_EXIT_DONE) { - nsvm_printk("#VMEXIT reason=%04x\n", exit_code); nested_svm_vmexit(svm); } @@ -1658,10 +1647,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) /* Restore the original control entries */ copy_vmcb_control_area(vmcb, hsave); - /* Kill any pending exceptions */ - if (svm->vcpu.arch.exception.pending == true) - nsvm_printk("WARNING: Pending Exception\n"); - kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); @@ -1826,25 +1811,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) force_new_asid(&svm->vcpu); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; - if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { - nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", - nested_vmcb->control.int_ctl); - } if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; else svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; - nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", - nested_vmcb->control.exit_int_info, - nested_vmcb->control.int_state); - svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; - if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) - nsvm_printk("Injecting Event: 0x%x\n", - nested_vmcb->control.event_inj); svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; @@ -1913,8 +1887,6 @@ static int vmsave_interception(struct vcpu_svm *svm) static int vmrun_interception(struct vcpu_svm *svm) { - nsvm_printk("VMrun\n"); - if (nested_svm_check_permissions(svm)) return 1; @@ -1974,7 +1946,6 @@ static int clgi_interception(struct vcpu_svm *svm) static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - nsvm_printk("INVLPGA\n"); trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], vcpu->arch.regs[VCPU_REGS_RAX]); @@ -2389,10 +2360,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) svm->vmcb->control.exit_int_info, svm->vmcb->control.exit_int_info_err); - nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", - exit_code, svm->vmcb->control.exit_info_1, - svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); - vmexit = nested_svm_exit_special(svm); if (vmexit == NESTED_EXIT_CONTINUE) @@ -2539,7 +2506,6 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) static void enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - nsvm_printk("Trying to open IRQ window\n"); nested_svm_intr(svm); -- cgit v1.2.3 From 4b8d54f9726f1159330201c5ed2ea30bce7e63ea Mon Sep 17 00:00:00 2001 From: "Zhai, Edwin" Date: Fri, 9 Oct 2009 18:03:20 +0800 Subject: KVM: VMX: Add support for Pause-Loop Exiting New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution control fields: PLE_Gap - upper bound on the amount of time between two successive executions of PAUSE in a loop. PLE_Window - upper bound on the amount of time a guest is allowed to execute in a PAUSE loop If the time, between this execution of PAUSE and previous one, exceeds the PLE_Gap, processor consider this PAUSE belongs to a new loop. Otherwise, processor determins the the total execution time of this loop(since 1st PAUSE in this loop), and triggers a VM exit if total time exceeds the PLE_Window. * Refer SDM volume 3b section 21.6.13 & 22.1.3. Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP is sched-out after hold a spinlock, then other VPs for same lock are sched-in to waste the CPU time. Our tests indicate that most spinlocks are held for less than 212 cycles. Performance tests show that with 2X LP over-commitment we can get +2% perf improvement for kernel build(Even more perf gain with more LPs). Signed-off-by: Zhai Edwin Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 4 ++++ arch/x86/kvm/vmx.c | 51 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 272514c2d456..2b4945419a84 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -56,6 +56,7 @@ #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -144,6 +145,8 @@ enum vmcs_field { VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, TPR_THRESHOLD = 0x0000401c, SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, @@ -248,6 +251,7 @@ enum vmcs_field { #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70020e505c22..a4580d65af59 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -61,6 +61,25 @@ module_param_named(unrestricted_guest, static int __read_mostly emulate_invalid_guest_state = 0; module_param(emulate_invalid_guest_state, bool, S_IRUGO); +/* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * ple_gap: upper bound on the amount of time between two successive + * executions of PAUSE in a loop. Also indicate if ple enabled. + * According to test, this time is usually small than 41 cycles. + * ple_window: upper bound on the amount of time a guest is allowed to execute + * in a PAUSE loop. Tests indicate that most spinlocks are held for + * less than 2^12 cycles + * Time is measured based on a counter that runs at the same rate as the TSC, + * refer SDM volume 3b section 21.6.13 & 22.1.3. + */ +#define KVM_VMX_DEFAULT_PLE_GAP 41 +#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 +static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; +module_param(ple_gap, int, S_IRUGO); + +static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; +module_param(ple_window, int, S_IRUGO); + struct vmcs { u32 revision_id; u32 abort; @@ -319,6 +338,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void) SECONDARY_EXEC_UNRESTRICTED_GUEST; } +static inline int cpu_has_vmx_ple(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_PAUSE_LOOP_EXITING; +} + static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) { return flexpriority_enabled && @@ -1240,7 +1265,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST; + SECONDARY_EXEC_UNRESTRICTED_GUEST | + SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -1386,6 +1412,9 @@ static __init int hardware_setup(void) if (enable_ept && !cpu_has_vmx_ept_2m_page()) kvm_disable_largepages(); + if (!cpu_has_vmx_ple()) + ple_gap = 0; + return alloc_kvm_area(); } @@ -2298,9 +2327,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; if (!enable_unrestricted_guest) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (!ple_gap) + exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } + if (ple_gap) { + vmcs_write32(PLE_GAP, ple_gap); + vmcs_write32(PLE_WINDOW, ple_window); + } + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ @@ -3347,6 +3383,18 @@ out: return ret; } +/* + * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE + * exiting, so only get here on cpu with PAUSE-Loop-Exiting. + */ +static int handle_pause(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + skip_emulated_instruction(vcpu); + kvm_vcpu_on_spin(vcpu); + + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -3383,6 +3431,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, + [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, }; static const int kvm_vmx_max_exit_handlers = -- cgit v1.2.3 From 565d0998ecac8373b9a9ecd5991abe74318cd235 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 6 Oct 2009 14:25:02 -0500 Subject: KVM: SVM: Support Pause Filter in AMD processors New AMD processors (Family 0x10 models 8+) support the Pause Filter Feature. This feature creates a new field in the VMCB called Pause Filter Count. If Pause Filter Count is greater than 0 and intercepting PAUSEs is enabled, the processor will increment an internal counter when a PAUSE instruction occurs instead of intercepting. When the internal counter reaches the Pause Filter Count value, a PAUSE intercept will occur. This feature can be used to detect contended spinlocks, especially when the lock holding VCPU is not scheduled. Rescheduling another VCPU prevents the VCPU seeking the lock from wasting its quantum by spinning idly. Experimental results show that most spinlocks are held for less than 1000 PAUSE cycles or more than a few thousand. Default the Pause Filter Counter to 3000 to detect the contended spinlocks. Processor support for this feature is indicated by a CPUID bit. On a 24 core system running 4 guests each with 16 VCPUs, this patch improved overall performance of each guest's 32 job kernbench by approximately 3-5% when combined with a scheduler algorithm thati caused the VCPU to sleep for a brief period. Further performance improvement may be possible with a more sophisticated yield algorithm. Signed-off-by: Mark Langsdorf Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/svm.h | 3 ++- arch/x86/kvm/svm.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 85574b7c1bc1..1fecb7e61130 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u16 intercept_dr_write; u32 intercept_exceptions; u64 intercept; - u8 reserved_1[44]; + u8 reserved_1[42]; + u16 pause_filter_count; u64 iopm_base_pa; u64 msrpm_base_pa; u64 tsc_offset; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 69610c5d6dea..170b2d9c6909 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_NPT (1 << 0) #define SVM_FEATURE_LBRV (1 << 1) #define SVM_FEATURE_SVML (1 << 2) +#define SVM_FEATURE_PAUSE_FILTER (1 << 10) #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ @@ -654,6 +655,11 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; + if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { + control->pause_filter_count = 3000; + control->intercept |= (1ULL << INTERCEPT_PAUSE); + } + enable_gif(svm); } @@ -2281,6 +2287,12 @@ static int interrupt_window_interception(struct vcpu_svm *svm) return 1; } +static int pause_interception(struct vcpu_svm *svm) +{ + kvm_vcpu_on_spin(&(svm->vcpu)); + return 1; +} + static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, @@ -2316,6 +2328,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, + [SVM_EXIT_PAUSE] = pause_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, [SVM_EXIT_INVLPGA] = invlpga_interception, -- cgit v1.2.3 From 6b7d7e762b238f908fe4c3345c2c6eb5c3fdbd59 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Fri, 9 Oct 2009 16:26:08 -1000 Subject: KVM: x86: Harden against cpufreq If cpufreq can't determine the CPU khz, or cpufreq is not compiled in, we should fallback to the measured TSC khz. Signed-off-by: Zachary Amsden Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 098e7f886306..3cffa2cac70b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1348,8 +1348,12 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); - if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) - per_cpu(cpu_tsc_khz, cpu) = cpufreq_quick_get(cpu); + if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { + unsigned long khz = cpufreq_quick_get(cpu); + if (!khz) + khz = tsc_khz; + per_cpu(cpu_tsc_khz, cpu) = khz; + } kvm_request_guest_time_update(vcpu); } @@ -3144,8 +3148,12 @@ static void kvm_timer_init(void) if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - for_each_online_cpu(cpu) - per_cpu(cpu_tsc_khz, cpu) = cpufreq_get(cpu); + for_each_online_cpu(cpu) { + unsigned long khz = cpufreq_get(cpu); + if (!khz) + khz = tsc_khz; + per_cpu(cpu_tsc_khz, cpu) = khz; + } } else { for_each_possible_cpu(cpu) per_cpu(cpu_tsc_khz, cpu) = tsc_khz; -- cgit v1.2.3 From 9fb41ba8962b18159e16cac81e7d57e897964038 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 12 Oct 2009 19:37:31 -0300 Subject: KVM: VMX: fix handle_pause declaration There's no kvm_run argument anymore. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a4580d65af59..364263a25ff8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3387,7 +3387,7 @@ out: * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE * exiting, so only get here on cpu with PAUSE-Loop-Exiting. */ -static int handle_pause(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_pause(struct kvm_vcpu *vcpu) { skip_emulated_instruction(vcpu); kvm_vcpu_on_spin(vcpu); -- cgit v1.2.3 From 94c30d9ca6fd00a69e367b91b6e13572c41938c5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 12 Oct 2009 08:51:40 +0200 Subject: KVM: x86: Drop unneeded CONFIG_HAS_IOMEM check This (broken) check dates back to the days when this code was shared across architectures. x86 has IOMEM, so drop it. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3cffa2cac70b..5d450cc6f841 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3814,7 +3814,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (r) goto out; } -#if CONFIG_HAS_IOMEM if (vcpu->mmio_needed) { memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); vcpu->mmio_read_completed = 1; @@ -3832,7 +3831,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } } -#endif if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) kvm_register_write(vcpu, VCPU_REGS_RAX, kvm_run->hypercall.ret); -- cgit v1.2.3 From ffde22ac53b6d6b1d7206f1172176a667eead778 Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Thu, 15 Oct 2009 15:21:43 -0700 Subject: KVM: Xen PV-on-HVM guest support Support for Xen PV-on-HVM guests can be implemented almost entirely in userspace, except for handling one annoying MSR that maps a Xen hypercall blob into guest address space. A generic mechanism to delegate MSR writes to userspace seems overkill and risks encouraging similar MSR abuse in the future. Thus this patch adds special support for the Xen HVM MSR. I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell KVM which MSR the guest will write to, as well as the starting address and size of the hypercall blobs (one each for 32-bit and 64-bit) that userspace has loaded from files. When the guest writes to the MSR, KVM copies one page of the blob from userspace to the guest. I've tested this patch with a hacked-up version of Gerd's userspace code, booting a number of guests (CentOS 5.3 i386 and x86_64, and FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices. [jan: fix i386 build warning] [avi: future proof abi with a flags field] Signed-off-by: Ed Swierk Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 24 +++++++++++++++++++++ arch/x86/include/asm/kvm.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 46 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 16 ++++++++++++++ 5 files changed, 89 insertions(+) (limited to 'arch/x86') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 5a4bc8cf6d04..3e8684e48506 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -593,6 +593,30 @@ struct kvm_irqchip { } chip; }; +4.27 KVM_XEN_HVM_CONFIG + +Capability: KVM_CAP_XEN_HVM +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_xen_hvm_config (in) +Returns: 0 on success, -1 on error + +Sets the MSR that the Xen HVM guest uses to initialize its hypercall +page, and provides the starting address and size of the hypercall +blobs in userspace. When the guest writes the MSR, kvm copies one +page of a blob (32- or 64-bit, depending on the vcpu mode) to guest +memory. + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f02e87a5206f..ef9b4b73cce4 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -19,6 +19,7 @@ #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 +#define __KVM_HAVE_XEN_HVM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919f53a4..36f3b53f5c27 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,6 +410,8 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + + struct kvm_xen_hvm_config xen_hvm_config; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d450cc6f841..bb842db3ee7c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -857,6 +857,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 0; } +static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm *kvm = vcpu->kvm; + int lm = is_long_mode(vcpu); + u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 + : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; + u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 + : kvm->arch.xen_hvm_config.blob_size_32; + u32 page_num = data & ~PAGE_MASK; + u64 page_addr = data & PAGE_MASK; + u8 *page; + int r; + + r = -E2BIG; + if (page_num >= blob_size) + goto out; + r = -ENOMEM; + page = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) + goto out; + r = -EFAULT; + if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) + goto out_free; + if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) + goto out_free; + r = 0; +out_free: + kfree(page); +out: + return r; +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { switch (msr) { @@ -972,6 +1004,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) "0x%x data 0x%llx\n", msr, data); break; default: + if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) + return xen_hvm_config(vcpu, data); if (!ignore_msrs) { pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); @@ -1246,6 +1280,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT2: case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: + case KVM_CAP_XEN_HVM: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2441,6 +2476,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_XEN_HVM_CONFIG: { + r = -EFAULT; + if (copy_from_user(&kvm->arch.xen_hvm_config, argp, + sizeof(struct kvm_xen_hvm_config))) + goto out; + r = -EINVAL; + if (kvm->arch.xen_hvm_config.flags) + goto out; + r = 0; + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f8f8900fc5ec..b694c1d2f918 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -436,6 +436,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 38 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -488,6 +491,18 @@ struct kvm_x86_mce { }; #endif +#ifdef KVM_CAP_XEN_HVM +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; +#endif + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) struct kvm_irqfd { @@ -546,6 +561,7 @@ struct kvm_irqfd { #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) /* * ioctls for vcpu fds -- cgit v1.2.3 From 94fe45da48f921d01d8ff02a0ad54ee9c326d7f0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 18 Oct 2009 13:24:44 +0200 Subject: KVM: x86: Fix guest single-stepping while interruptible Commit 705c5323 opened the doors of hell by unconditionally injecting single-step flags as long as guest_debug signaled this. This doesn't work when the guest branches into some interrupt or exception handler and triggers a vmexit with flag reloading. Fix it by saving cs:rip when user space requests single-stepping and restricting the trace flag injection to this guest code position. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 ++++ arch/x86/kvm/x86.c | 47 ++++++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 36f3b53f5c27..2536fbd85b3a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -371,6 +371,10 @@ struct kvm_vcpu_arch { u64 mcg_status; u64 mcg_ctl; u64 *mce_banks; + + /* used for guest single stepping over the given code position */ + u16 singlestep_cs; + unsigned long singlestep_rip; }; struct kvm_mem_alias { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bb842db3ee7c..13f30aac460b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -235,25 +235,6 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) } EXPORT_SYMBOL_GPL(kvm_require_cpl); -unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) -{ - unsigned long rflags; - - rflags = kvm_x86_ops->get_rflags(vcpu); - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); - return rflags; -} -EXPORT_SYMBOL_GPL(kvm_get_rflags); - -void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) -{ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - kvm_x86_ops->set_rflags(vcpu, rflags); -} -EXPORT_SYMBOL_GPL(kvm_set_rflags); - /* * Load the pae pdptrs. Return true is they are all valid. */ @@ -4565,6 +4546,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + vcpu->arch.singlestep_cs = + get_segment_selector(vcpu, VCPU_SREG_CS); + vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); + } + /* * Trigger an rflags update that will inject or remove the trace * flags. @@ -5031,6 +5018,28 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) return kvm_x86_ops->interrupt_allowed(vcpu); } +unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) +{ + unsigned long rflags; + + rflags = kvm_x86_ops->get_rflags(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); + return rflags; +} +EXPORT_SYMBOL_GPL(kvm_get_rflags); + +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && + vcpu->arch.singlestep_cs == + get_segment_selector(vcpu, VCPU_SREG_CS) && + vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) + rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + kvm_x86_ops->set_rflags(vcpu, rflags); +} +EXPORT_SYMBOL_GPL(kvm_set_rflags); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); -- cgit v1.2.3 From 6be7d3062b59af891be7e40c6802350de5f78cef Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 18 Oct 2009 13:24:54 +0200 Subject: KVM: SVM: Cleanup NMI singlestep Push the NMI-related singlestep variable into vcpu_svm. It's dealing with an AMD-specific deficit, nothing generic for x86. Acked-by: Gleb Natapov Signed-off-by: Jan Kiszka arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2536fbd85b3a..4d994ad5051a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -354,7 +354,6 @@ struct kvm_vcpu_arch { unsigned int time_offset; struct page *time_page; - bool singlestep; /* guest is single stepped by KVM */ bool nmi_pending; bool nmi_injected; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 170b2d9c6909..ffa6ad216b71 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -107,6 +107,8 @@ struct vcpu_svm { u32 *msrpm; struct nested_state nested; + + bool nmi_singlestep; }; /* enable NPT for AMD64 and X86 with PAE */ @@ -1050,7 +1052,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) svm->vmcb->control.intercept_exceptions &= ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); - if (vcpu->arch.singlestep) + if (svm->nmi_singlestep) svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { @@ -1195,13 +1197,13 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && - !svm->vcpu.arch.singlestep) { + !svm->nmi_singlestep) { kvm_queue_exception(&svm->vcpu, DB_VECTOR); return 1; } - if (svm->vcpu.arch.singlestep) { - svm->vcpu.arch.singlestep = false; + if (svm->nmi_singlestep) { + svm->nmi_singlestep = false; if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); @@ -2543,7 +2545,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) /* Something prevents NMI from been injected. Single step over possible problem (IRET or exception injection or interrupt shadow) */ - vcpu->arch.singlestep = true; + svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); update_db_intercept(vcpu); } -- cgit v1.2.3 From afbcf7ab8d1bc8c2d04792f6d9e786e0adeb328d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 16 Oct 2009 15:28:36 -0400 Subject: KVM: allow userspace to adjust kvmclock offset When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [marcelo: future-proof abi with a flags field] [jan: fix KVM_GET_CLOCK by clearing flags field instead of checking it] Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 36 +++++++++++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 42 ++++++++++++++++++++++++++++++++++++++++- include/linux/kvm.h | 10 ++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 3e8684e48506..36594ba57723 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -617,6 +617,42 @@ struct kvm_xen_hvm_config { __u8 pad2[30]; }; +4.27 KVM_GET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (out) +Returns: 0 on success, -1 on error + +Gets the current timestamp of kvmclock as seen by the current guest. In +conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.28 KVM_SET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (in) +Returns: 0 on success, -1 on error + +Sets the current timestamp of kvmclock to the valued specific in its parameter. +In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4d994ad5051a..0558ff8c32ae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -413,6 +413,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; struct kvm_xen_hvm_config xen_hvm_config; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 13f30aac460b..e16cdc9ec0c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -680,7 +680,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ vcpu->hv_clock.system_time = ts.tv_nsec + - (NSEC_PER_SEC * (u64)ts.tv_sec); + (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -1262,6 +1263,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: + case KVM_CAP_ADJUST_CLOCK: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2468,6 +2470,44 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + s64 delta; + + r = -EFAULT; + if (copy_from_user(&user_ns, argp, sizeof(user_ns))) + goto out; + + r = -EINVAL; + if (user_ns.flags) + goto out; + + r = 0; + ktime_get_ts(&now); + now_ns = timespec_to_ns(&now); + delta = user_ns.clock - now_ns; + kvm->arch.kvmclock_offset = delta; + break; + } + case KVM_GET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + + ktime_get_ts(&now); + now_ns = timespec_to_ns(&now); + user_ns.clock = kvm->arch.kvmclock_offset + now_ns; + user_ns.flags = 0; + + r = -EFAULT; + if (copy_to_user(argp, &user_ns, sizeof(user_ns))) + goto out; + r = 0; + break; + } + default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b694c1d2f918..6ed1a12ed526 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -439,6 +439,7 @@ struct kvm_ioeventfd { #ifdef __KVM_HAVE_XEN_HVM #define KVM_CAP_XEN_HVM 38 #endif +#define KVM_CAP_ADJUST_CLOCK 39 #ifdef KVM_CAP_IRQ_ROUTING @@ -512,6 +513,12 @@ struct kvm_irqfd { __u8 pad[20]; }; +struct kvm_clock_data { + __u64 clock; + __u32 flags; + __u32 pad[9]; +}; + /* * ioctls for VM fds */ @@ -562,6 +569,9 @@ struct kvm_irqfd { #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) + /* * ioctls for vcpu fds -- cgit v1.2.3 From fa40052ca04bdbbeb20b839cc8ffe9fa7beefbe9 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:49:58 -0200 Subject: KVM: VMX: Use macros instead of hex value on cr0 initialization This should have no effect, it is just to make the code clearer. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 364263a25ff8..17730175aa08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2538,7 +2538,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx->vcpu.arch.cr0 = 0x60000010; + vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); -- cgit v1.2.3 From 18fa000ae453767b59ab97477925895a3f0c46ea Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:49:59 -0200 Subject: KVM: SVM: Reset cr0 properly on vcpu reset svm_vcpu_reset() was not properly resetting the contents of the guest-visible cr0 register, causing the following issue: https://bugzilla.redhat.com/show_bug.cgi?id=525699 Without resetting cr0 properly, the vcpu was running the SIPI bootstrap routine with paging enabled, making the vcpu get a pagefault exception while trying to run it. Instead of setting vmcb->save.cr0 directly, the new code just resets kvm->arch.cr0 and calls kvm_set_cr0(). The bits that were set/cleared on vmcb->save.cr0 (PG, WP, !CD, !NW) will be set properly by svm_set_cr0(). kvm_set_cr0() is used instead of calling svm_set_cr0() directly to make sure kvm_mmu_reset_context() is called to reset the mmu to nonpaging mode. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ffa6ad216b71..c9ef6c0e1e98 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -628,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm) save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; - /* - * cr0 val on cpu init should be 0x60000010, we enable cpu - * cache by default. the orderly way is to enable cache in bios. + /* This is the guest-visible cr0 value. + * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. */ - save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; + svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); + save->cr4 = X86_CR4_PAE; /* rdx = ?? */ -- cgit v1.2.3 From 3ce672d48400e0112fec7a3cb6bb2120493c6e11 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Sat, 24 Oct 2009 02:50:00 -0200 Subject: KVM: SVM: init_vmcb(): remove redundant save->cr0 initialization The svm_set_cr0() call will initialize save->cr0 properly even when npt is enabled, clearing the NW and CD bits as expected, so we don't need to initialize it manually for npt_enabled anymore. Signed-off-by: Eduardo Habkost Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c9ef6c0e1e98..34b700f9e498 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -648,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm) control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| INTERCEPT_CR3_MASK); save->g_pat = 0x0007040600070406ULL; - /* enable caching because the QEMU Bios doesn't enable it */ - save->cr0 = X86_CR0_ET; save->cr3 = 0; save->cr4 = 0; } -- cgit v1.2.3 From 44ea2b1758d88ad822e65b1c4c21ca6164494e27 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 Sep 2009 15:55:37 +0300 Subject: KVM: VMX: Move MSR_KERNEL_GS_BASE out of the vmx autoload msr area Currently MSR_KERNEL_GS_BASE is saved and restored as part of the guest/host msr reloading. Since we wish to lazy-restore all the other msrs, save and reload MSR_KERNEL_GS_BASE explicitly instead of using the common code. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 17730175aa08..32512519e1ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -99,7 +99,8 @@ struct vcpu_vmx { int save_nmsrs; int msr_offset_efer; #ifdef CONFIG_X86_64 - int msr_offset_kernel_gs_base; + u64 msr_host_kernel_gs_base; + u64 msr_guest_kernel_gs_base; #endif struct vmcs *vmcs; struct { @@ -202,7 +203,7 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu); */ static const u32 vmx_msr_index[] = { #ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, + MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif MSR_EFER, MSR_K6_STAR, }; @@ -674,10 +675,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) - save_msrs(vmx->host_msrs + - vmx->msr_offset_kernel_gs_base, 1); - + if (is_long_mode(&vmx->vcpu)) { + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + } #endif load_msrs(vmx->guest_msrs, vmx->save_nmsrs); load_transition_efer(vmx); @@ -711,6 +712,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) save_msrs(vmx->guest_msrs, vmx->save_nmsrs); load_msrs(vmx->host_msrs, vmx->save_nmsrs); reload_host_efer(vmx); +#ifdef CONFIG_X86_64 + if (is_long_mode(&vmx->vcpu)) { + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + } +#endif } static void vmx_load_host_state(struct vcpu_vmx *vmx) @@ -938,9 +945,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); index = __find_msr_index(vmx, MSR_CSTAR); - if (index >= 0) - move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); /* @@ -954,10 +958,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) #endif vmx->save_nmsrs = save_nmsrs; -#ifdef CONFIG_X86_64 - vmx->msr_offset_kernel_gs_base = - __find_msr_index(vmx, MSR_KERNEL_GS_BASE); -#endif vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); if (cpu_has_vmx_msr_bitmap()) { @@ -1015,6 +1015,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) case MSR_GS_BASE: data = vmcs_readl(GUEST_GS_BASE); break; + case MSR_KERNEL_GS_BASE: + vmx_load_host_state(to_vmx(vcpu)); + data = to_vmx(vcpu)->msr_guest_kernel_gs_base; + break; case MSR_EFER: return kvm_get_msr_common(vcpu, msr_index, pdata); #endif @@ -1068,6 +1072,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_GS_BASE: vmcs_writel(GUEST_GS_BASE, data); break; + case MSR_KERNEL_GS_BASE: + vmx_load_host_state(vmx); + vmx->msr_guest_kernel_gs_base = data; + break; #endif case MSR_IA32_SYSENTER_CS: vmcs_write32(GUEST_SYSENTER_CS, data); @@ -1559,6 +1567,11 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + /* + * Force kernel_gs_base reloading before EFER changes, as control + * of this msr depends on is_long_mode(). + */ + vmx_load_host_state(to_vmx(vcpu)); vcpu->arch.shadow_efer = efer; if (!msr) return; -- cgit v1.2.3 From 18863bdd60f895f3b3ba16b15e8331aee781e8ec Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Sep 2009 11:12:18 +0300 Subject: KVM: x86 shared msr infrastructure The various syscall-related MSRs are fairly expensive to switch. Currently we switch them on every vcpu preemption, which is far too often: - if we're switching to a kernel thread (idle task, threaded interrupt, kernel-mode virtio server (vhost-net), for example) and back, then there's no need to switch those MSRs since kernel threasd won't be exiting to userspace. - if we're switching to another guest running an identical OS, most likely those MSRs will have the same value, so there's little point in reloading them. - if we're running the same OS on the guest and host, the MSRs will have identical values and reloading is unnecessary. This patch uses the new user return notifiers to implement last-minute switching, and checks the msr values to avoid unnecessary reloading. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/x86.c | 81 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0558ff8c32ae..26a74b7bb6bc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -809,4 +809,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_define_shared_msr(unsigned index, u32 msr); +void kvm_set_shared_msr(unsigned index, u64 val); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b84e571f4175..4cd498332466 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE + select USER_RETURN_NOTIFIER ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e16cdc9ec0c1..58c5cddf363d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #undef TRACE_INCLUDE_FILE #define CREATE_TRACE_POINTS @@ -87,6 +88,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +#define KVM_NR_SHARED_MSRS 16 + +struct kvm_shared_msrs_global { + int nr; + struct kvm_shared_msr { + u32 msr; + u64 value; + } msrs[KVM_NR_SHARED_MSRS]; +}; + +struct kvm_shared_msrs { + struct user_return_notifier urn; + bool registered; + u64 current_value[KVM_NR_SHARED_MSRS]; +}; + +static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; +static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); + struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, { "pf_guest", VCPU_STAT(pf_guest) }, @@ -123,6 +143,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +static void kvm_on_user_return(struct user_return_notifier *urn) +{ + unsigned slot; + struct kvm_shared_msr *global; + struct kvm_shared_msrs *locals + = container_of(urn, struct kvm_shared_msrs, urn); + + for (slot = 0; slot < shared_msrs_global.nr; ++slot) { + global = &shared_msrs_global.msrs[slot]; + if (global->value != locals->current_value[slot]) { + wrmsrl(global->msr, global->value); + locals->current_value[slot] = global->value; + } + } + locals->registered = false; + user_return_notifier_unregister(urn); +} + +void kvm_define_shared_msr(unsigned slot, u32 msr) +{ + int cpu; + u64 value; + + if (slot >= shared_msrs_global.nr) + shared_msrs_global.nr = slot + 1; + shared_msrs_global.msrs[slot].msr = msr; + rdmsrl_safe(msr, &value); + shared_msrs_global.msrs[slot].value = value; + for_each_online_cpu(cpu) + per_cpu(shared_msrs, cpu).current_value[slot] = value; +} +EXPORT_SYMBOL_GPL(kvm_define_shared_msr); + +static void kvm_shared_msr_cpu_online(void) +{ + unsigned i; + struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); + + for (i = 0; i < shared_msrs_global.nr; ++i) + locals->current_value[i] = shared_msrs_global.msrs[i].value; +} + +void kvm_set_shared_msr(unsigned slot, u64 value) +{ + struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); + + if (value == smsr->current_value[slot]) + return; + smsr->current_value[slot] = value; + wrmsrl(shared_msrs_global.msrs[slot].msr, value); + if (!smsr->registered) { + smsr->urn.on_user_return = kvm_on_user_return; + user_return_notifier_register(&smsr->urn); + smsr->registered = true; + } +} +EXPORT_SYMBOL_GPL(kvm_set_shared_msr); + unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -4815,6 +4893,9 @@ int kvm_arch_hardware_enable(void *garbage) int cpu = raw_smp_processor_id(); per_cpu(cpu_tsc_khz, cpu) = 0; } + + kvm_shared_msr_cpu_online(); + return kvm_x86_ops->hardware_enable(garbage); } -- cgit v1.2.3 From 26bb0981b3ff00b9177d61fe55806db978862b3c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Sep 2009 11:14:12 +0300 Subject: KVM: VMX: Use shared msr infrastructure Instead of reloading syscall MSRs on every preemption, use the new shared msr infrastructure to reload them at the last possible minute (just before exit to userspace). Improves vcpu/idle/vcpu switches by about 2000 cycles (when EFER needs to be reloaded as well). [jan: fix slot index missing indirection] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 112 ++++++++++++++++++++--------------------------------- 1 file changed, 42 insertions(+), 70 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 32512519e1ac..bf46253149c3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -86,6 +86,11 @@ struct vmcs { char data[0]; }; +struct shared_msr_entry { + unsigned index; + u64 data; +}; + struct vcpu_vmx { struct kvm_vcpu vcpu; struct list_head local_vcpus_link; @@ -93,8 +98,7 @@ struct vcpu_vmx { int launched; u8 fail; u32 idt_vectoring_info; - struct kvm_msr_entry *guest_msrs; - struct kvm_msr_entry *host_msrs; + struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; int msr_offset_efer; @@ -108,7 +112,6 @@ struct vcpu_vmx { u16 fs_sel, gs_sel, ldt_sel; int gs_ldt_reload_needed; int fs_reload_needed; - int guest_efer_loaded; } host_state; struct { int vm86_active; @@ -195,6 +198,8 @@ static struct kvm_vmx_segment_field { VMX_SEGMENT_FIELD(LDTR), }; +static u64 host_efer; + static void ept_save_pdptrs(struct kvm_vcpu *vcpu); /* @@ -209,22 +214,6 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -static void load_msrs(struct kvm_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - wrmsrl(e[i].index, e[i].data); -} - -static void save_msrs(struct kvm_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - rdmsrl(e[i].index, e[i].data); -} - static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -373,7 +362,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) int i; for (i = 0; i < vmx->nmsrs; ++i) - if (vmx->guest_msrs[i].index == msr) + if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) return i; return -1; } @@ -404,7 +393,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) : : "a" (&operand), "c" (ext) : "cc", "memory"); } -static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) +static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -595,17 +584,15 @@ static void reload_tss(void) load_TR_desc(); } -static void load_transition_efer(struct vcpu_vmx *vmx) +static bool update_transition_efer(struct vcpu_vmx *vmx) { int efer_offset = vmx->msr_offset_efer; - u64 host_efer; u64 guest_efer; u64 ignore_bits; if (efer_offset < 0) - return; - host_efer = vmx->host_msrs[efer_offset].data; - guest_efer = vmx->guest_msrs[efer_offset].data; + return false; + guest_efer = vmx->vcpu.arch.shadow_efer; /* * NX is emulated; LMA and LME handled by hardware; SCE meaninless @@ -619,26 +606,18 @@ static void load_transition_efer(struct vcpu_vmx *vmx) ignore_bits &= ~(u64)EFER_SCE; #endif if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) - return; + return false; - vmx->host_state.guest_efer_loaded = 1; guest_efer &= ~ignore_bits; guest_efer |= host_efer & ignore_bits; - wrmsrl(MSR_EFER, guest_efer); - vmx->vcpu.stat.efer_reload++; -} - -static void reload_host_efer(struct vcpu_vmx *vmx) -{ - if (vmx->host_state.guest_efer_loaded) { - vmx->host_state.guest_efer_loaded = 0; - load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); - } + vmx->guest_msrs[efer_offset].data = guest_efer; + return true; } static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int i; if (vmx->host_state.loaded) return; @@ -680,8 +659,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); } #endif - load_msrs(vmx->guest_msrs, vmx->save_nmsrs); - load_transition_efer(vmx); + for (i = 0; i < vmx->save_nmsrs; ++i) + kvm_set_shared_msr(vmx->guest_msrs[i].index, + vmx->guest_msrs[i].data); } static void __vmx_load_host_state(struct vcpu_vmx *vmx) @@ -709,9 +689,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) local_irq_restore(flags); } reload_tss(); - save_msrs(vmx->guest_msrs, vmx->save_nmsrs); - load_msrs(vmx->host_msrs, vmx->save_nmsrs); - reload_host_efer(vmx); #ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) { rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); @@ -908,19 +885,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, /* * Swap MSR entry in host/guest MSR entry array. */ -#ifdef CONFIG_X86_64 static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) { - struct kvm_msr_entry tmp; + struct shared_msr_entry tmp; tmp = vmx->guest_msrs[to]; vmx->guest_msrs[to] = vmx->guest_msrs[from]; vmx->guest_msrs[from] = tmp; - tmp = vmx->host_msrs[to]; - vmx->host_msrs[to] = vmx->host_msrs[from]; - vmx->host_msrs[from] = tmp; } -#endif /* * Set up the vmcs to automatically save and restore system @@ -929,15 +901,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) */ static void setup_msrs(struct vcpu_vmx *vmx) { - int save_nmsrs; + int save_nmsrs, index; unsigned long *msr_bitmap; vmx_load_host_state(vmx); save_nmsrs = 0; #ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) { - int index; - index = __find_msr_index(vmx, MSR_SYSCALL_MASK); if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); @@ -956,9 +926,11 @@ static void setup_msrs(struct vcpu_vmx *vmx) move_msr_up(vmx, index, save_nmsrs++); } #endif - vmx->save_nmsrs = save_nmsrs; + vmx->msr_offset_efer = index = __find_msr_index(vmx, MSR_EFER); + if (index >= 0 && update_transition_efer(vmx)) + move_msr_up(vmx, index, save_nmsrs++); - vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); + vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) { if (is_long_mode(&vmx->vcpu)) @@ -1000,7 +972,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { u64 data; - struct kvm_msr_entry *msr; + struct shared_msr_entry *msr; if (!pdata) { printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); @@ -1019,9 +991,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) vmx_load_host_state(to_vmx(vcpu)); data = to_vmx(vcpu)->msr_guest_kernel_gs_base; break; +#endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_index, pdata); -#endif case MSR_IA32_TSC: data = guest_read_tsc(); break; @@ -1035,6 +1007,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { vmx_load_host_state(to_vmx(vcpu)); @@ -1056,7 +1029,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_msr_entry *msr; + struct shared_msr_entry *msr; u64 host_tsc; int ret = 0; @@ -1565,7 +1538,10 @@ continue_rmode: static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + + if (!msr) + return; /* * Force kernel_gs_base reloading before EFER changes, as control @@ -2417,10 +2393,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) if (wrmsr_safe(index, data_low, data_high) < 0) continue; data = data_low | ((u64)data_high << 32); - vmx->host_msrs[j].index = index; - vmx->host_msrs[j].reserved = 0; - vmx->host_msrs[j].data = data; - vmx->guest_msrs[j] = vmx->host_msrs[j]; + vmx->guest_msrs[j].index = i; + vmx->guest_msrs[j].data = 0; ++vmx->nmsrs; } @@ -3821,7 +3795,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) __clear_bit(vmx->vpid, vmx_vpid_bitmap); spin_unlock(&vmx_vpid_lock); vmx_free_vmcs(vcpu); - kfree(vmx->host_msrs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); @@ -3848,10 +3821,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto uninit_vcpu; } - vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vmx->host_msrs) - goto free_guest_msrs; - vmx->vmcs = alloc_vmcs(); if (!vmx->vmcs) goto free_msrs; @@ -3882,8 +3851,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) free_vmcs: free_vmcs(vmx->vmcs); free_msrs: - kfree(vmx->host_msrs); -free_guest_msrs: kfree(vmx->guest_msrs); uninit_vcpu: kvm_vcpu_uninit(&vmx->vcpu); @@ -4033,7 +4000,12 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - int r; + int r, i; + + rdmsrl_safe(MSR_EFER, &host_efer); + + for (i = 0; i < NR_VMX_MSR; ++i) + kvm_define_shared_msr(i, vmx_msr_index[i]); vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_io_bitmap_a) -- cgit v1.2.3 From 1655e3a3dc16e21b60d9950e201b38a9894f1bcf Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 25 Oct 2009 17:45:07 +0200 Subject: KVM: remove duplicated task_switch check Probably introduced by a bad merge. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 58c5cddf363d..dbddcc2d2c97 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4525,11 +4525,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); } - /* set back link to prev task only if NT bit is set in eflags - note that old_tss_sel is not used afetr this point */ - if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) - old_tss_sel = 0xffff; - /* set back link to prev task only if NT bit is set in eflags note that old_tss_sel is not used afetr this point */ if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) -- cgit v1.2.3 From 7c93be44a4790b0fd9dddf29c5503cf86c105304 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 26 Oct 2009 16:48:33 -0200 Subject: KVM: VMX: move CR3/PDPTR update to vmx_set_cr3 GUEST_CR3 is updated via kvm_set_cr3 whenever CR3 is modified from outside guest context. Similarly pdptrs are updated via load_pdptrs. Let kvm_set_cr3 perform the update, removing it from the vcpu_run fast path. Signed-off-by: Marcelo Tosatti Acked-by: Acked-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 5 +---- arch/x86/kvm/x86.c | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bf46253149c3..a5f3f3ec69e6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1737,6 +1737,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vmcs_write64(EPT_POINTER, eptp); guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : vcpu->kvm->arch.ept_identity_map_addr; + ept_load_pdptrs(vcpu); } vmx_flush_tlb(vcpu); @@ -3625,10 +3626,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (enable_ept && is_paging(vcpu)) { - vmcs_writel(GUEST_CR3, vcpu->arch.cr3); - ept_load_pdptrs(vcpu); - } /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) vmx->entry_time = ktime_get(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dbddcc2d2c97..719f31eecd3d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4591,8 +4591,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (!is_long_mode(vcpu) && is_pae(vcpu)) + if (!is_long_mode(vcpu) && is_pae(vcpu)) { load_pdptrs(vcpu, vcpu->arch.cr3); + mmu_reset_needed = 1; + } if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); -- cgit v1.2.3 From 5f5c35aad5ccaa8f1bd5d9e12f9f5251f3180093 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 26 Oct 2009 16:50:14 -0200 Subject: KVM: MMU: update invlpg handler comment Large page translations are always synchronized (either in level 3 or level 2), so its not necessary to properly deal with them in the invlpg handler. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 72558f8ff3f5..a6017132fba8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) level = iterator.level; sptep = iterator.sptep; - /* FIXME: properly handle invlpg on large guest pages */ if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { -- cgit v1.2.3 From 92c0d900159a4fa582e1c8ebcc1c4a8020defff5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 29 Oct 2009 11:00:16 +0200 Subject: KVM: VMX: Remove vmx->msr_offset_efer This variable is used to communicate between a caller and a callee; switch to a function argument instead. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a5f3f3ec69e6..c9cc9596e1a6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -101,7 +101,6 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; - int msr_offset_efer; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; @@ -584,14 +583,11 @@ static void reload_tss(void) load_TR_desc(); } -static bool update_transition_efer(struct vcpu_vmx *vmx) +static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { - int efer_offset = vmx->msr_offset_efer; u64 guest_efer; u64 ignore_bits; - if (efer_offset < 0) - return false; guest_efer = vmx->vcpu.arch.shadow_efer; /* @@ -926,8 +922,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) move_msr_up(vmx, index, save_nmsrs++); } #endif - vmx->msr_offset_efer = index = __find_msr_index(vmx, MSR_EFER); - if (index >= 0 && update_transition_efer(vmx)) + index = __find_msr_index(vmx, MSR_EFER); + if (index >= 0 && update_transition_efer(vmx, index)) move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; -- cgit v1.2.3 From 3ddea128ad75bd33e88780fe44f44c3717369b98 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 29 Oct 2009 13:44:15 -0200 Subject: KVM: x86: disallow multiple KVM_CREATE_IRQCHIP Otherwise kvm will leak memory on multiple KVM_CREATE_IRQCHIP. Also serialize multiple accesses with kvm->lock. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/irq.h | 6 +++++- arch/x86/kvm/x86.c | 30 ++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index c025a2362aae..be399e207d57 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -86,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) static inline int irqchip_in_kernel(struct kvm *kvm) { - return pic_irqchip(kvm) != NULL; + int ret; + + ret = (pic_irqchip(kvm) != NULL); + smp_rmb(); + return ret; } void kvm_pic_reset(struct kvm_kpic_state *s); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 719f31eecd3d..97f6f9565ac9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2362,25 +2362,39 @@ long kvm_arch_vm_ioctl(struct file *filp, if (r) goto out; break; - case KVM_CREATE_IRQCHIP: + case KVM_CREATE_IRQCHIP: { + struct kvm_pic *vpic; + + mutex_lock(&kvm->lock); + r = -EEXIST; + if (kvm->arch.vpic) + goto create_irqchip_unlock; r = -ENOMEM; - kvm->arch.vpic = kvm_create_pic(kvm); - if (kvm->arch.vpic) { + vpic = kvm_create_pic(kvm); + if (vpic) { r = kvm_ioapic_init(kvm); if (r) { - kfree(kvm->arch.vpic); - kvm->arch.vpic = NULL; - goto out; + kfree(vpic); + goto create_irqchip_unlock; } } else - goto out; + goto create_irqchip_unlock; + smp_wmb(); + kvm->arch.vpic = vpic; + smp_wmb(); r = kvm_setup_default_irq_routing(kvm); if (r) { + mutex_lock(&kvm->irq_lock); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); - goto out; + kvm->arch.vpic = NULL; + kvm->arch.vioapic = NULL; + mutex_unlock(&kvm->irq_lock); } + create_irqchip_unlock: + mutex_unlock(&kvm->lock); break; + } case KVM_CREATE_PIT: u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; goto create_pit; -- cgit v1.2.3 From 2204ae3c96e9a1fed50f7ee19ce092e69d7dfe82 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 29 Oct 2009 13:44:16 -0200 Subject: KVM: x86: disallow KVM_{SET,GET}_LAPIC without allocated in-kernel lapic Otherwise kvm might attempt to dereference a NULL pointer. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 97f6f9565ac9..cd6fe0a5797f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1893,6 +1893,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_GET_LAPIC: { + r = -EINVAL; + if (!vcpu->arch.apic) + goto out; lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); r = -ENOMEM; @@ -1908,6 +1911,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SET_LAPIC: { + r = -EINVAL; + if (!vcpu->arch.apic) + goto out; lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); r = -ENOMEM; if (!lapic) -- cgit v1.2.3 From 4f926bf291863c237188bd2e27222ed801f12094 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 30 Oct 2009 12:46:59 +0100 Subject: KVM: x86: Polish exception injection via KVM_SET_GUEST_DEBUG Decouple KVM_GUESTDBG_INJECT_DB and KVM_GUESTDBG_INJECT_BP from KVM_GUESTDBG_ENABLE, their are actually orthogonal. At this chance, avoid triggering the WARN_ON in kvm_queue_exception if there is already an exception pending and reject such invalid requests. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd6fe0a5797f..ba8958dca3c4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4656,10 +4656,20 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { unsigned long rflags; - int i; + int i, r; vcpu_load(vcpu); + if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { + r = -EBUSY; + if (vcpu->arch.exception.pending) + goto unlock_out; + if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_queue_exception(vcpu, DB_VECTOR); + else + kvm_queue_exception(vcpu, BP_VECTOR); + } + /* * Read rflags as long as potentially injected trace flags are still * filtered out. @@ -4695,14 +4705,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, kvm_x86_ops->set_guest_debug(vcpu, dbg); - if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_DB) - kvm_queue_exception(vcpu, DB_VECTOR); - else if (vcpu->guest_debug & KVM_GUESTDBG_INJECT_BP) - kvm_queue_exception(vcpu, BP_VECTOR); + r = 0; +unlock_out: vcpu_put(vcpu); - return 0; + return r; } /* -- cgit v1.2.3 From a9c7399d6cda0a092b347f8ee49bbe44f6e1fe66 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Nov 2009 11:54:59 +0200 Subject: KVM: Allow internal errors reported to userspace to carry extra data Usually userspace will freeze the guest so we can inspect it, but some internal state is not available. Add extra data to internal error reporting so we can expose it to the debugger. Extra data is specific to the suberror. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/vmx.c | 1 + include/linux/kvm.h | 4 ++++ virt/kvm/kvm_main.c | 1 + 4 files changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a9024797b21f..4c3e5b2314cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) case EMULATE_FAIL: vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; return 0; default: BUG(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c9cc9596e1a6..c0e66dd58a47 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3352,6 +3352,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) kvm_report_emulation_failure(vcpu, "emulation failure"); vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; ret = 0; goto out; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ca62b8e056f9..172639e94392 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -251,6 +251,9 @@ struct kvm_run { } dcr; struct { __u32 suberror; + /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ + __u32 ndata; + __u64 data[16]; } internal; /* Fix the size of the union. */ char padding[256]; @@ -484,6 +487,7 @@ struct kvm_ioeventfd { #define KVM_CAP_XEN_HVM 38 #endif #define KVM_CAP_ADJUST_CLOCK 39 +#define KVM_CAP_INTERNAL_ERROR_DATA 40 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bd44fb48ac43..f92ba138007a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1653,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_KVM_APIC_ARCHITECTURE case KVM_CAP_SET_BOOT_CPU_ID: #endif + case KVM_CAP_INTERNAL_ERROR_DATA: return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v1.2.3 From 65ac7264043740572ba804edca03c374d70427c9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 4 Nov 2009 11:59:01 +0200 Subject: KVM: VMX: Report unexpected simultaneous exceptions as internal errors These happen when we trap an exception when another exception is being delivered; we only expect these with MCEs and page faults. If something unexpected happens, things probably went south and we're better off reporting an internal error and freezing. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 11 ++++++++--- include/linux/kvm.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c0e66dd58a47..22fcd27a0b58 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2744,9 +2744,14 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_machine_check(vcpu); if ((vect_info & VECTORING_INFO_VALID_MASK) && - !is_page_fault(intr_info)) - printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " - "intr info 0x%x\n", __func__, vect_info, intr_info); + !is_page_fault(intr_info)) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = vect_info; + vcpu->run->internal.data[1] = intr_info; + return 0; + } if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 172639e94392..976f4d181858 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -163,6 +163,7 @@ struct kvm_pit_config { /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 +#define KVM_INTERNAL_ERROR_SIMUL_EX 2 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { -- cgit v1.2.3 From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Nov 2009 01:04:25 +0100 Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS This new IOCTL exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. [avi: future-proof abi by adding a flags field] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 49 ++++++++++++++++++++++++++ arch/x86/include/asm/kvm.h | 28 +++++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 22 ++++++++++++ arch/x86/kvm/vmx.c | 30 ++++++++++++++++ arch/x86/kvm/x86.c | 77 +++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 6 ++++ 7 files changed, 214 insertions(+) (limited to 'arch/x86') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 36594ba57723..e1a114161027 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -653,6 +653,55 @@ struct kvm_clock_data { __u32 pad[9]; }; +4.29 KVM_GET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (out) +Returns: 0 on success, -1 on error + +Gets currently pending exceptions, interrupts, and NMIs as well as related +states of the vcpu. + +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; /* must be zero */ +}; + +4.30 KVM_SET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (in) +Returns: 0 on success, -1 on error + +Set pending exceptions, interrupts, and NMIs as well as related states of the +vcpu. + +See KVM_GET_VCPU_EVENTS for the data structure. + + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ef9b4b73cce4..950df434763f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -20,6 +20,7 @@ #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM +#define __KVM_HAVE_VCPU_EVENTS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -252,4 +253,31 @@ struct kvm_reinject_control { __u8 pit_reinject; __u8 reserved[31]; }; + +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; + __u32 reserved[10]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26a74b7bb6bc..06e085614dad 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,6 +523,8 @@ struct kvm_x86_ops { bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 34b700f9e498..3de0b37ec038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2499,6 +2499,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) !(svm->vcpu.arch.hflags & HF_NMI_MASK); } +static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); +} + +static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (masked) { + svm->vcpu.arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + } else { + svm->vcpu.arch.hflags &= ~HF_NMI_MASK; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + } +} + static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2946,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { .queue_exception = svm_queue_exception, .interrupt_allowed = svm_interrupt_allowed, .nmi_allowed = svm_nmi_allowed, + .get_nmi_mask = svm_get_nmi_mask, + .set_nmi_mask = svm_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 22fcd27a0b58..778f059ae423 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2639,6 +2639,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) GUEST_INTR_STATE_NMI)); } +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) +{ + if (!cpu_has_virtual_nmis()) + return to_vmx(vcpu)->soft_vnmi_blocked; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); +} + +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + if (vmx->soft_vnmi_blocked != masked) { + vmx->soft_vnmi_blocked = masked; + vmx->vnmi_blocked_time = 0; + } + } else { + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -3985,6 +4013,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .queue_exception = vmx_queue_exception, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, + .get_nmi_mask = vmx_get_nmi_mask, + .set_nmi_mask = vmx_set_nmi_mask, .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba8958dca3c4..35eea30821d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1342,6 +1342,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: case KVM_CAP_ADJUST_CLOCK: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1883,6 +1884,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_load(vcpu); + + events->exception.injected = vcpu->arch.exception.pending; + events->exception.nr = vcpu->arch.exception.nr; + events->exception.has_error_code = vcpu->arch.exception.has_error_code; + events->exception.error_code = vcpu->arch.exception.error_code; + + events->interrupt.injected = vcpu->arch.interrupt.pending; + events->interrupt.nr = vcpu->arch.interrupt.nr; + events->interrupt.soft = vcpu->arch.interrupt.soft; + + events->nmi.injected = vcpu->arch.nmi_injected; + events->nmi.pending = vcpu->arch.nmi_pending; + events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + + events->sipi_vector = vcpu->arch.sipi_vector; + + events->flags = 0; + + vcpu_put(vcpu); +} + +static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + if (events->flags) + return -EINVAL; + + vcpu_load(vcpu); + + vcpu->arch.exception.pending = events->exception.injected; + vcpu->arch.exception.nr = events->exception.nr; + vcpu->arch.exception.has_error_code = events->exception.has_error_code; + vcpu->arch.exception.error_code = events->exception.error_code; + + vcpu->arch.interrupt.pending = events->interrupt.injected; + vcpu->arch.interrupt.nr = events->interrupt.nr; + vcpu->arch.interrupt.soft = events->interrupt.soft; + if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); + + vcpu->arch.nmi_injected = events->nmi.injected; + vcpu->arch.nmi_pending = events->nmi.pending; + kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); + + vcpu->arch.sipi_vector = events->sipi_vector; + + vcpu_put(vcpu); + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2040,6 +2096,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); break; } + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); + + r = -EFAULT; + if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) + break; + r = 0; + break; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + r = -EFAULT; + if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) + break; + + r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 976f4d181858..92045a92d714 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -489,6 +489,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_ADJUST_CLOCK 39 #define KVM_CAP_INTERNAL_ERROR_DATA 40 +#ifdef __KVM_HAVE_VCPU_EVENTS +#define KVM_CAP_VCPU_EVENTS 41 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -672,6 +675,9 @@ struct kvm_clock_data { /* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) +/* Available with KVM_CAP_VCPU_EVENTS */ +#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) +#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From eb3c79e64a70fb8f7473e30fa07e89c1ecc2c9bb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 24 Nov 2009 15:20:15 +0200 Subject: KVM: x86 emulator: limit instructions to 15 bytes While we are never normally passed an instruction that exceeds 15 bytes, smp games can cause us to attempt to interpret one, which will cause large latencies in non-preempt hosts. Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/kvm/emulate.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7ed2c423116..7c18e1230f54 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -129,7 +129,7 @@ struct decode_cache { u8 seg_override; unsigned int d; unsigned long regs[NR_VCPU_REGS]; - unsigned long eip; + unsigned long eip, eip_orig; /* modrm */ u8 modrm; u8 modrm_mod; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d226dff47d77..7e8faea4651e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -622,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, { int rc = 0; + /* x86 instructions are limited to 15 bytes. */ + if (eip + size - ctxt->decode.eip_orig > 15) + return X86EMUL_UNHANDLEABLE; eip += ctxt->cs_base; while (size--) { rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); @@ -880,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) /* Shadow copy of register state. Committed on successful emulation. */ memset(c, 0, sizeof(struct decode_cache)); - c->eip = kvm_rip_read(ctxt->vcpu); + c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); -- cgit v1.2.3 From 046d87103addc117f0d397196e85189722d4d7de Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 27 Nov 2009 16:46:26 +0800 Subject: KVM: VMX: Disable unrestricted guest when EPT disabled Otherwise would cause VMEntry failure when using ept=0 on unrestricted guest supported processors. Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 778f059ae423..ed97c6c7e648 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2309,8 +2309,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; - if (!enable_ept) + if (!enable_ept) { exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + enable_unrestricted_guest = 0; + } if (!enable_unrestricted_guest) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) -- cgit v1.2.3 From 3548bab501887a698a887639b54d5ecaf35c387b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 28 Nov 2009 14:18:47 +0200 Subject: KVM: Drop user return notifier when disabling virtualization on a cpu This way, we don't leave a dangling notifier on cpu hotunplug or module unload. In particular, module unload leaves the notifier pointing into freed memory. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 35eea30821d6..106f9f1f78c0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -201,6 +201,14 @@ void kvm_set_shared_msr(unsigned slot, u64 value) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); +static void drop_user_return_notifiers(void *ignore) +{ + struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); + + if (smsr->registered) + kvm_on_user_return(&smsr->urn); +} + unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -5004,6 +5012,7 @@ int kvm_arch_hardware_enable(void *garbage) void kvm_arch_hardware_disable(void *garbage) { kvm_x86_ops->hardware_disable(garbage); + drop_user_return_notifiers(garbage); } int kvm_arch_hardware_setup(void) -- cgit v1.2.3 From d5696725b2a4c59503f5e0bc33adeee7f30cd45b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 Dec 2009 12:28:47 +0200 Subject: KVM: VMX: Fix comparison of guest efer with stale host value update_transition_efer() masks out some efer bits when deciding whether to switch the msr during guest entry; for example, NX is emulated using the mmu so we don't need to disable it, and LMA/LME are handled by the hardware. However, with shared msrs, the comparison is made against a stale value; at the time of the guest switch we may be running with another guest's efer. Fix by deferring the mask/compare to the actual point of guest entry. Noted by Marcelo. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx.c | 9 +++++---- arch/x86/kvm/x86.c | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06e085614dad..4f865e8b8540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -812,6 +812,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_define_shared_msr(unsigned index, u32 msr); -void kvm_set_shared_msr(unsigned index, u64 val); +void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed97c6c7e648..d4918d6fc924 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -89,6 +89,7 @@ struct vmcs { struct shared_msr_entry { unsigned index; u64 data; + u64 mask; }; struct vcpu_vmx { @@ -601,12 +602,10 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) if (guest_efer & EFER_LMA) ignore_bits &= ~(u64)EFER_SCE; #endif - if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) - return false; - guest_efer &= ~ignore_bits; guest_efer |= host_efer & ignore_bits; vmx->guest_msrs[efer_offset].data = guest_efer; + vmx->guest_msrs[efer_offset].mask = ~ignore_bits; return true; } @@ -657,7 +656,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif for (i = 0; i < vmx->save_nmsrs; ++i) kvm_set_shared_msr(vmx->guest_msrs[i].index, - vmx->guest_msrs[i].data); + vmx->guest_msrs[i].data, + vmx->guest_msrs[i].mask); } static void __vmx_load_host_state(struct vcpu_vmx *vmx) @@ -2394,6 +2394,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) data = data_low | ((u64)data_high << 32); vmx->guest_msrs[j].index = i; vmx->guest_msrs[j].data = 0; + vmx->guest_msrs[j].mask = -1ull; ++vmx->nmsrs; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 106f9f1f78c0..ce677b20bf86 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -185,11 +185,11 @@ static void kvm_shared_msr_cpu_online(void) locals->current_value[i] = shared_msrs_global.msrs[i].value; } -void kvm_set_shared_msr(unsigned slot, u64 value) +void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) { struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); - if (value == smsr->current_value[slot]) + if (((value ^ smsr->current_value[slot]) & mask) == 0) return; smsr->current_value[slot] = value; wrmsrl(shared_msrs_global.msrs[slot].msr, value); -- cgit v1.2.3 From 4528752f49c1f4025473d12bc5fa9181085c3f22 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 2 Dec 2009 15:05:56 -0800 Subject: x86, Calgary IOMMU quirk: Find nearest matching Calgary while walking up the PCI tree On a multi-node x3950M2 system, there's a slight oddity in the PCI device tree for all secondary nodes: 30:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev e1) \-33:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) \-34:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 1078 (rev 04) ...as compared to the primary node: 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev e1) \-01:00.0 VGA compatible controller: ATI Technologies Inc ES1000 (rev 02) 03:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) \-04:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 1078 (rev 04) In both nodes, the LSI RAID controller hangs off a CalIOC2 device, but on the secondary nodes, the BIOS hides the VGA device and substitutes the device tree ending with the disk controller. It would seem that Calgary devices don't necessarily appear at the top of the PCI tree, which means that the current code to find the Calgary IOMMU that goes with a particular device is buggy. Rather than walk all the way to the top of the PCI device tree and try to match bus number with Calgary descriptor, the code needs to examine each parent of the particular device; if it encounters a Calgary with a matching bus number, simply use that. Otherwise, we BUG() when the bus number of the Calgary doesn't match the bus number of whatever's at the top of the device tree. Extra note: This patch appears to work correctly for the x3950 that came before the x3950 M2. Signed-off-by: Darrick J. Wong Acked-by: Muli Ben-Yehuda Cc: FUJITA Tomonori Cc: Joerg Roedel Cc: Yinghai Lu Cc: Jon D. Mason Cc: Corinna Schultz Cc: LKML-Reference: <20091202230556.GG10295@tux1.beaverton.ibm.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 849a0995d970..c563e4c8ff39 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -316,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) pdev = to_pci_dev(dev); + /* search up the device tree for an iommu */ pbus = pdev->bus; - - /* is the device behind a bridge? Look for the root bus */ - while (pbus->parent) + do { + tbl = pci_iommu(pbus); + if (tbl && tbl->it_busno == pbus->number) + break; + tbl = NULL; pbus = pbus->parent; - - tbl = pci_iommu(pbus); + } while (pbus); BUG_ON(tbl && (tbl->it_busno != pbus->number)); -- cgit v1.2.3 From 57fea8f7ab67ef42b7f84999e49e47f8717a2d5b Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 3 Dec 2009 19:06:40 +0800 Subject: x86/reboot: Add pci_dev_put in reboot_fixup_32.c for consistency pci_get_device will increase the ref count of found device. Although we're going to reset soon, we should use pci_dev_put to decrease the ref count for consistency. Signed-off-by: Xiaotian Feng Acked-by: H. Peter Anvin Cc: Yinghai Lu LKML-Reference: <1259838400-23833-1-git-send-email-dfeng@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot_fixups_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 61a837743fe5..201eab63b05f 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -80,6 +80,7 @@ void mach_reboot_fixups(void) continue; cur->reboot_fixup(dev); + pci_dev_put(dev); } } -- cgit v1.2.3 From 7d1849aff6687a135a8da3a75e32a00e3137a5e2 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Thu, 3 Dec 2009 15:52:44 +0100 Subject: x86, apic: Enable lapic nmi watchdog on AMD Family 11h The x86 lapic nmi watchdog does not recognize AMD Family 11h, resulting in: NMI watchdog: CPU not supported As far as I can see from available documentation (the BKDM), family 11h looks identical to family 10h as far as the PMU is concerned. Extending the check to accept family 11h results in: Testing NMI watchdog ... OK. I've been running with this change on a Turion X2 Ultra ZM-82 laptop for a couple of weeks now without problems. Signed-off-by: Mikael Pettersson Cc: Andreas Herrmann Cc: Joerg Roedel Cc: LKML-Reference: <19223.53436.931768.278021@pilspetsen.it.uu.se> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed6..898df9719afb 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) + boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) return; wd_ops = &k7_wd_ops; break; -- cgit v1.2.3 From be012920ecba161ad20303a3f6d9e96c58cf97c7 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 21 Nov 2009 08:35:55 +0800 Subject: xen: re-register runstate area earlier on resume. This is necessary to ensure the runstate area is available to xen_sched_clock before any calls to printk which will require it in order to provide a timestamp. I chose to pull the xen_setup_runstate_info out of xen_time_init into the caller in order to maintain parity with calling xen_setup_runstate_info separately from calling xen_time_resume. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 2 ++ arch/x86/xen/time.c | 5 ++--- arch/x86/xen/xen-ops.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dfbf70e65860..cb61f77e4496 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -148,6 +148,8 @@ void xen_vcpu_restore(void) HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) BUG(); + xen_setup_runstate_info(cpu); + xen_vcpu_setup(cpu); if (other_cpu && diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0a5aa44299a5..6bbff94328d2 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; } -static void setup_runstate_info(int cpu) +void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -442,8 +442,6 @@ void xen_setup_timer(int cpu) evt->cpumask = cpumask_of(cpu); evt->irq = irq; - - setup_runstate_info(cpu); } void xen_teardown_timer(int cpu) @@ -494,6 +492,7 @@ __init void xen_time_init(void) setup_force_cpu_cap(X86_FEATURE_TSC); + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 355fa6b99c9c..32529326683d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -41,6 +41,7 @@ void __init xen_build_dynamic_phys_to_machine(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); +void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); -- cgit v1.2.3 From 3905bb2aa7bb801b31946b37a4635ebac4009051 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Nov 2009 08:46:29 +0800 Subject: xen: restore runstate_info even if !have_vcpu_info_placement Even if have_vcpu_info_placement is not set, we still need to set up the runstate area on each resumed vcpu. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cb61f77e4496..a7b49f99a130 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -138,26 +138,23 @@ static void xen_vcpu_setup(int cpu) */ void xen_vcpu_restore(void) { - if (have_vcpu_info_placement) { - int cpu; + int cpu; - for_each_online_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) - BUG(); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); - xen_setup_runstate_info(cpu); + xen_setup_runstate_info(cpu); + if (have_vcpu_info_placement) xen_vcpu_setup(cpu); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) - BUG(); - } - - BUG_ON(!have_vcpu_info_placement); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); } } -- cgit v1.2.3 From fa24ba62ea2869308ffc9f0b286ac9650b4ca6cb Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 21 Nov 2009 11:32:49 +0000 Subject: xen: correctly restore pfn_to_mfn_list_list after resume pvops kernels >= 2.6.30 can currently only be saved and restored once. The second attempt to save results in: ERROR Internal error: Frame# in pfn-to-mfn frame list is not in pseudophys ERROR Internal error: entry 0: p2m_frame_list[0] is 0xf2c2c2c2, max 0x120000 ERROR Internal error: Failed to map/save the p2m frame list I finally narrowed it down to: commit cdaead6b4e657f960d6d6f9f380e7dfeedc6a09b Author: Jeremy Fitzhardinge Date: Fri Feb 27 15:34:59 2009 -0800 xen: split construction of p2m mfn tables from registration Build the p2m_mfn_list_list early with the rest of the p2m table, but register it later when the real shared_info structure is in place. Signed-off-by: Jeremy Fitzhardinge The unforeseen side-effect of this change was to cause the mfn list list to not be rebuilt on resume. Prior to this change it would have been rebuilt via xen_post_suspend() -> xen_setup_shared_info() -> xen_setup_mfn_list_list(). Fix by explicitly calling xen_build_mfn_list_list() from xen_post_suspend(). Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/mmu.c | 2 +- arch/x86/xen/suspend.c | 2 ++ arch/x86/xen/xen-ops.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3bf7b1d250ce..bf4cd6bfe959 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -static void __init xen_build_mfn_list_list(void) +void xen_build_mfn_list_list(void) { unsigned pfn, idx; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 95be7b434724..6343a5d8e93c 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -27,6 +27,8 @@ void xen_pre_suspend(void) void xen_post_suspend(int suspend_cancelled) { + xen_build_mfn_list_list(); + xen_setup_shared_info(); if (suspend_cancelled) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 32529326683d..f9153a300bce 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); -- cgit v1.2.3 From f350c7922faad3397c98c81a9e5658f5a1ef0214 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 24 Nov 2009 10:16:23 +0000 Subject: xen: register timer interrupt with IRQF_TIMER Otherwise the timer is disabled by dpm_suspend_noirq() which in turn prevents correct operation of stop_machine on multi-processor systems and breaks suspend. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6bbff94328d2..9d1f853120d8 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -434,7 +434,7 @@ void xen_setup_timer(int cpu) name = ""; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, name, NULL); evt = &per_cpu(xen_clock_events, cpu); -- cgit v1.2.3 From 028896721ac04f6fa0697f3ecac3f98761746363 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 24 Nov 2009 09:32:48 -0800 Subject: xen: register runstate on secondary CPUs The commit "xen: re-register runstate area earlier on resume" caused us to never try and setup the runstate area for secondary CPUs. Ensure that we do this... Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fe03eeed7b48..360f8d8c19cd 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -295,6 +295,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; #endif + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_init_lock_cpu(cpu); -- cgit v1.2.3 From 499d19b82b586aef18727b9ae1437f8f37b66e91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 24 Nov 2009 09:38:25 -0800 Subject: xen: register runstate info for boot CPU early printk timestamping uses sched_clock, which in turn relies on runstate info under Xen. So make sure we set it up before any printks can be called. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a7b49f99a130..79f97383cde3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1181,6 +1181,8 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("about to get started...\n"); + xen_setup_runstate_info(0); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); -- cgit v1.2.3 From 6aaf5d633bb6cead81b396d861d7bae4b9a0ba7e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 25 Nov 2009 13:15:38 -0800 Subject: xen: use iret for return from 64b kernel to 32b usermode If Xen wants to return to a 32b usermode with sysret it must use the right form. When using VCGF_in_syscall to trigger this, it looks at the code segment and does a 32b sysret if it is FLAT_USER_CS32. However, this is different from __USER32_CS, so it fails to return properly if we use the normal Linux segment. So avoid the whole mess by dropping VCGF_in_syscall and simply use plain iret to return to usermode. Signed-off-by: Jeremy Fitzhardinge Acked-by: Jan Beulich Cc: Stable Kernel --- arch/x86/xen/xen-asm_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 02f496a8dbaa..53adefda4275 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -96,7 +96,7 @@ ENTRY(xen_sysret32) pushq $__USER32_CS pushq %rcx - pushq $VGCF_in_syscall + pushq $0 1: jmp hypercall_iret ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) @@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax - pushq $VGCF_in_syscall + pushq $0 jmp hypercall_iret ENDPROC(xen_syscall32_target) ENDPROC(xen_sysenter_target) -- cgit v1.2.3 From f6eafe3665bcc374c66775d58312d1c06c55303f Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 25 Nov 2009 14:12:08 +0000 Subject: xen: call clock resume notifier on all CPUs tick_resume() is never called on secondary processors. Presumably this is because they are offlined for suspend on native and so this is normally taken care of in the CPU onlining path. Under Xen we keep all CPUs online over a suspend. This patch papers over the issue for me but I will investigate a more generic, less hacky, way of doing to the same. tick_suspend is also only called on the boot CPU which I presume should be fixed too. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel Cc: Thomas Gleixner --- arch/x86/xen/suspend.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 6343a5d8e93c..987267f79bf5 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -46,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled) } +static void xen_vcpu_notify_restore(void *data) +{ + unsigned long reason = (unsigned long)data; + + /* Boot processor notified via generic timekeeping_resume() */ + if ( smp_processor_id() == 0) + return; + + clockevents_notify(reason, NULL); +} + void xen_arch_resume(void) { - /* nothing */ + smp_call_function(xen_vcpu_notify_restore, + (void *)CLOCK_EVT_NOTIFY_RESUME, 1); } -- cgit v1.2.3 From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001 From: André Goddard Rosa Date: Sat, 14 Nov 2009 13:09:05 -0200 Subject: tree-wide: fix assorted typos all over the place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is "success", "unknown", "through", "performance", "[re|un]mapping" , "access", "default", "reasonable", "[con]currently", "temperature" , "channel", "[un]used", "application", "example","hierarchy", "therefore" , "[over|under]flow", "contiguous", "threshold", "enough" and others. Signed-off-by: André Goddard Rosa Signed-off-by: Jiri Kosina --- Documentation/ABI/testing/procfs-diskstats | 2 +- Documentation/ABI/testing/sysfs-block | 2 +- Documentation/DocBook/mtdnand.tmpl | 2 +- Documentation/DocBook/v4l/videodev2.h.xml | 2 +- Documentation/DocBook/writing-an-alsa-driver.tmpl | 2 +- Documentation/dvb/README.dvb-usb | 2 +- Documentation/lguest/lguest.c | 2 +- Documentation/scsi/ChangeLog.megaraid_sas | 2 +- Documentation/spi/spi-summary | 2 +- Documentation/sysctl/vm.txt | 2 +- Documentation/video4linux/gspca.txt | 2 +- Documentation/vm/page-types.c | 2 +- arch/alpha/mm/numa.c | 2 +- arch/arm/common/scoop.c | 2 +- .../mach-bcmring/include/mach/csp/dmacHw_priv.h | 2 +- arch/arm/mach-bcmring/include/mach/dma.h | 2 +- arch/arm/mach-lh7a40x/include/mach/hardware.h | 2 +- arch/arm/mach-orion5x/pci.c | 2 +- arch/arm/mach-pxa/include/mach/palmld.h | 2 +- arch/arm/mach-pxa/include/mach/palmt5.h | 2 +- arch/arm/mach-pxa/include/mach/palmtc.h | 2 +- arch/arm/mach-pxa/include/mach/palmte2.h | 2 +- arch/arm/mach-pxa/include/mach/palmtx.h | 2 +- arch/arm/mach-pxa/include/mach/palmz72.h | 2 +- arch/arm/mach-s3c6400/setup-sdhci.c | 2 +- arch/arm/mach-s3c6410/setup-sdhci.c | 2 +- arch/arm/mach-sa1100/dma.c | 2 +- arch/arm/plat-mxc/include/mach/iomux-mx3.h | 2 +- arch/arm/plat-mxc/include/mach/iomux-mxc91231.h | 2 +- arch/arm/plat-mxc/pwm.c | 2 +- arch/arm/plat-omap/dma.c | 2 +- arch/arm/plat-omap/include/mach/omap16xx.h | 2 +- arch/arm/plat-s3c24xx/include/plat/map.h | 2 +- arch/avr32/boards/hammerhead/Kconfig | 2 +- arch/blackfin/kernel/traps.c | 2 +- .../mach-bf518/include/mach/defBF51x_base.h | 4 +- .../mach-bf527/include/mach/defBF52x_base.h | 4 +- arch/blackfin/mach-bf537/include/mach/defBF534.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF544.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF547.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF548.h | 4 +- arch/blackfin/mach-bf548/include/mach/defBF549.h | 4 +- arch/cris/mm/fault.c | 2 +- arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/include/asm/perfmon_default_smpl.h | 2 +- arch/ia64/include/asm/sn/shubio.h | 2 +- arch/ia64/kernel/esi.c | 2 +- arch/ia64/kernel/perfmon.c | 2 +- arch/m68k/ifpsp060/src/fpsp.S | 28 +++++----- arch/m68k/ifpsp060/src/pfpsp.S | 26 ++++----- arch/m68k/include/asm/bootinfo.h | 2 +- arch/microblaze/lib/memcpy.c | 2 +- arch/microblaze/lib/memmove.c | 2 +- arch/microblaze/lib/memset.c | 2 +- arch/mips/include/asm/mach-pnx833x/gpio.h | 2 +- arch/mips/include/asm/sgi/ioc.h | 2 +- arch/mips/include/asm/sibyte/sb1250_mac.h | 2 +- arch/mips/include/asm/sn/sn0/hubio.h | 2 +- arch/mips/kernel/smtc.c | 2 +- arch/mips/math-emu/dp_sub.c | 2 +- arch/mips/txx9/generic/smsc_fdc37m81x.c | 2 +- arch/powerpc/include/asm/reg_fsl_emb.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/tau_6xx.c | 2 +- arch/powerpc/oprofile/op_model_cell.c | 4 +- arch/powerpc/platforms/52xx/mpc52xx_pci.c | 2 +- arch/powerpc/platforms/powermac/pci.c | 2 +- arch/powerpc/sysdev/dart_iommu.c | 2 +- arch/s390/math-emu/math.c | 4 +- arch/x86/include/asm/desc_defs.h | 4 +- arch/x86/include/asm/mmzone_32.h | 2 +- arch/x86/include/asm/uv/uv_bau.h | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/amd_iommu.c | 4 +- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/kprobes.c | 4 +- arch/x86/mm/kmmio.c | 4 +- block/blk-iopoll.c | 2 +- drivers/ata/ata_piix.c | 2 +- drivers/ata/sata_fsl.c | 6 +-- drivers/atm/iphase.c | 2 +- drivers/base/dd.c | 2 +- drivers/bluetooth/btmrvl_sdio.c | 2 +- drivers/bluetooth/hci_ldisc.c | 2 +- drivers/char/mem.c | 2 +- drivers/char/mspec.c | 2 +- drivers/char/n_r3964.c | 2 +- drivers/char/rio/route.h | 2 +- drivers/crypto/hifn_795x.c | 2 +- drivers/dma/at_hdmac.c | 2 +- drivers/firewire/core-topology.c | 2 +- drivers/gpu/drm/drm_crtc.c | 4 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_fb.c | 2 +- drivers/gpu/drm/i915/intel_sdvo.c | 2 +- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/radeon_fb.c | 2 +- drivers/gpu/drm/radeon/radeon_state.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/radeon/rv770.c | 4 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- drivers/hwmon/adm1029.c | 2 +- drivers/hwmon/lm93.c | 2 +- drivers/ieee1394/dv1394.c | 2 +- drivers/infiniband/hw/ipath/ipath_iba6110.c | 2 +- drivers/infiniband/hw/ipath/ipath_sd7220.c | 4 +- drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/input/serio/hp_sdc.c | 2 +- drivers/input/serio/hp_sdc_mlc.c | 2 +- drivers/input/touchscreen/atmel-wm97xx.c | 2 +- drivers/input/touchscreen/mainstone-wm97xx.c | 4 +- drivers/input/touchscreen/zylonite-wm97xx.c | 2 +- drivers/isdn/capi/capidrv.c | 2 +- drivers/isdn/hardware/eicon/di.c | 2 +- drivers/isdn/hardware/eicon/maintidi.c | 4 +- drivers/isdn/hardware/mISDN/hfcsusb.c | 2 +- drivers/isdn/hardware/mISDN/hfcsusb.h | 2 +- drivers/isdn/hardware/mISDN/mISDNisar.c | 2 +- drivers/isdn/hisax/hfc_usb.c | 2 +- drivers/isdn/i4l/isdn_ppp.c | 6 +-- drivers/isdn/i4l/isdn_ttyfax.c | 2 +- drivers/isdn/mISDN/dsp_core.c | 2 +- drivers/isdn/mISDN/tei.c | 2 +- drivers/macintosh/therm_windtunnel.c | 2 +- drivers/media/common/saa7146_i2c.c | 2 +- drivers/media/dvb/dvb-core/dvb_frontend.h | 2 +- drivers/media/dvb/dvb-usb/anysee.c | 2 +- drivers/media/dvb/dvb-usb/dibusb-mb.c | 2 +- drivers/media/dvb/dvb-usb/dvb-usb-remote.c | 2 +- drivers/media/dvb/dvb-usb/usb-urb.c | 4 +- drivers/media/dvb/frontends/au8522_decoder.c | 2 +- drivers/media/dvb/frontends/cx24110.c | 4 +- drivers/media/dvb/frontends/cx24113.c | 2 +- drivers/media/dvb/frontends/dib3000mb.c | 2 +- drivers/media/dvb/frontends/lgdt330x.c | 4 +- drivers/media/dvb/frontends/stb0899_drv.c | 2 +- drivers/media/dvb/ttpci/av7110.c | 4 +- drivers/media/dvb/ttpci/budget-patch.c | 2 +- drivers/media/radio/radio-mr800.c | 2 +- drivers/media/video/cx231xx/cx231xx-avcore.c | 8 +-- drivers/media/video/cx23885/cx23885-dvb.c | 2 +- drivers/media/video/cx88/cx88-core.c | 2 +- drivers/media/video/davinci/dm355_ccdc.c | 2 +- drivers/media/video/davinci/vpss.c | 2 +- drivers/media/video/gspca/sonixb.c | 2 +- drivers/media/video/gspca/spca500.c | 2 +- drivers/media/video/gspca/spca501.c | 6 +-- drivers/media/video/gspca/sunplus.c | 2 +- drivers/media/video/gspca/zc3xx.c | 2 +- drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h | 2 +- drivers/media/video/s2255drv.c | 2 +- drivers/media/video/zoran/zoran.h | 2 +- drivers/message/i2o/i2o_block.c | 2 +- drivers/message/i2o/iop.c | 4 +- drivers/misc/sgi-gru/grufile.c | 2 +- drivers/mmc/host/s3cmci.c | 2 +- drivers/mtd/devices/slram.c | 2 +- drivers/mtd/nand/diskonchip.c | 2 +- drivers/mtd/nand/nand_ecc.c | 2 +- drivers/mtd/nand/s3c2410.c | 2 +- drivers/net/82596.c | 2 +- drivers/net/amd8111e.c | 7 ++- drivers/net/appletalk/cops.c | 2 +- drivers/net/ariadne.h | 2 +- drivers/net/atl1c/atl1c_main.c | 2 +- drivers/net/benet/be_cmds.h | 2 +- drivers/net/benet/be_main.c | 2 +- drivers/net/bnx2x_reg.h | 2 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/ehea/ehea_ethtool.c | 4 +- drivers/net/hamradio/baycom_ser_fdx.c | 2 +- drivers/net/iseries_veth.c | 2 +- drivers/net/lasi_82596.c | 2 +- drivers/net/lib82596.c | 2 +- drivers/net/mlx4/en_rx.c | 2 +- drivers/net/mlx4/en_tx.c | 2 +- drivers/net/mlx4/mlx4_en.h | 2 +- drivers/net/ps3_gelic_net.c | 2 +- drivers/net/sis900.c | 4 +- drivers/net/skfp/h/smc.h | 8 +-- drivers/net/skfp/skfddi.c | 2 +- drivers/net/smsc911x.c | 2 +- drivers/net/smsc911x.h | 2 +- drivers/net/spider_net.c | 2 +- drivers/net/stmmac/gmac.c | 2 +- drivers/net/stmmac/gmac.h | 4 +- drivers/net/tokenring/smctr.c | 2 +- drivers/net/ucc_geth.c | 2 +- drivers/net/ucc_geth.h | 20 +++---- drivers/net/usb/smsc95xx.c | 2 +- drivers/net/wan/lmc/lmc_main.c | 2 +- drivers/net/wimax/i2400m/rx.c | 2 +- drivers/net/wireless/ath/ath5k/phy.c | 6 +-- drivers/net/wireless/ath/ath9k/rc.c | 2 +- drivers/net/wireless/ipw2x00/ipw2100.c | 6 +-- drivers/net/wireless/ipw2x00/ipw2200.c | 8 +-- drivers/net/wireless/ipw2x00/libipw_module.c | 2 +- drivers/net/wireless/iwmc3200wifi/hal.c | 2 +- drivers/net/wireless/iwmc3200wifi/rx.c | 2 +- drivers/net/wireless/libertas/if_sdio.c | 2 +- drivers/net/wireless/prism54/isl_ioctl.c | 4 +- drivers/net/wireless/rt2x00/rt2400pci.h | 2 +- drivers/net/wireless/rt2x00/rt2500pci.h | 2 +- drivers/net/wireless/rt2x00/rt2500usb.h | 2 +- drivers/net/wireless/rt2x00/rt61pci.h | 2 +- drivers/net/wireless/rt2x00/rt73usb.h | 2 +- drivers/net/wireless/wavelan_cs.c | 2 +- drivers/net/wireless/zd1211rw/zd_mac.c | 2 +- drivers/parisc/ccio-dma.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 2 +- drivers/pnp/pnpbios/rsparser.c | 8 +-- drivers/ps3/ps3-sys-manager.c | 2 +- drivers/rtc/rtc-v3020.c | 2 +- drivers/s390/char/fs3270.c | 2 +- drivers/s390/cio/chp.c | 2 +- drivers/s390/cio/cmf.c | 2 +- drivers/sbus/char/envctrl.c | 4 +- drivers/scsi/53c700.c | 2 +- drivers/scsi/aacraid/aacraid.h | 6 +-- drivers/scsi/aacraid/comminit.c | 2 +- drivers/scsi/aic7xxx/aic79xx.seq | 2 +- drivers/scsi/aic7xxx/aic79xx_core.c | 2 +- drivers/scsi/aic7xxx/aic7xxx_core.c | 2 +- drivers/scsi/bfa/include/defs/bfa_defs_pport.h | 2 +- drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h | 2 +- drivers/scsi/hptiop.c | 2 +- drivers/scsi/libfc/fc_lport.c | 2 +- drivers/scsi/libiscsi_tcp.c | 2 +- drivers/scsi/lpfc/lpfc_attr.c | 4 +- drivers/scsi/lpfc/lpfc_els.c | 4 +- drivers/scsi/lpfc/lpfc_init.c | 62 +++++++++++----------- drivers/scsi/lpfc/lpfc_sli.c | 10 ++-- drivers/scsi/megaraid.h | 2 +- drivers/scsi/megaraid/mbox_defs.h | 2 +- drivers/scsi/megaraid/megaraid_mbox.c | 2 +- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 2 +- drivers/scsi/ncr53c8xx.c | 2 +- drivers/scsi/pmcraid.c | 6 +-- drivers/scsi/pmcraid.h | 6 +-- drivers/scsi/scsi_netlink.c | 2 +- drivers/scsi/scsi_transport_sas.c | 6 +-- drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +- drivers/scsi/sym53c8xx_2/sym_hipd.c | 2 +- drivers/scsi/sym53c8xx_2/sym_hipd.h | 2 +- drivers/serial/8250_pnp.c | 4 +- drivers/serial/pmac_zilog.h | 2 +- drivers/serial/ucc_uart.c | 2 +- drivers/telephony/ixj.c | 4 +- drivers/usb/atm/ueagle-atm.c | 2 +- drivers/usb/class/usbtmc.c | 2 +- drivers/usb/core/message.c | 2 +- drivers/usb/gadget/f_acm.c | 2 +- drivers/usb/gadget/pxa27x_udc.c | 2 +- drivers/usb/host/fhci-sched.c | 2 +- drivers/usb/wusbcore/crypto.c | 2 +- drivers/usb/wusbcore/wa-xfer.c | 4 +- drivers/uwb/i1480/dfu/usb.c | 2 +- drivers/uwb/wlp/txrx.c | 2 +- drivers/video/aty/atyfb_base.c | 4 +- drivers/video/backlight/atmel-pwm-bl.c | 2 +- drivers/video/backlight/tosa_lcd.c | 2 +- drivers/video/console/sticore.c | 2 +- drivers/video/gbefb.c | 2 +- drivers/video/stifb.c | 4 +- drivers/video/tdfxfb.c | 2 +- drivers/video/via/dvi.c | 4 +- drivers/video/vt8623fb.c | 2 +- drivers/watchdog/coh901327_wdt.c | 2 +- drivers/watchdog/machzwd.c | 2 +- drivers/watchdog/wdrtas.c | 6 +-- fs/binfmt_elf.c | 2 +- fs/bio.c | 2 +- fs/btrfs/extent_map.c | 2 +- fs/cifs/README | 2 +- fs/cifs/cifsglob.h | 2 +- fs/cifs/inode.c | 4 +- fs/cifs/smbdes.c | 2 +- fs/dlm/plock.c | 2 +- fs/ext4/inode.c | 6 +-- fs/ext4/mballoc.c | 2 +- fs/jffs2/compr.c | 2 +- fs/jffs2/readinode.c | 2 +- fs/jffs2/xattr.c | 2 +- fs/jfs/jfs_dmap.c | 4 +- fs/ncpfs/ioctl.c | 2 +- fs/ntfs/compress.c | 2 +- fs/ntfs/file.c | 4 +- fs/ntfs/logfile.c | 2 +- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 2 +- fs/ocfs2/dlmglue.c | 2 +- fs/ocfs2/journal.c | 2 +- fs/ocfs2/refcounttree.c | 2 +- fs/omfs/bitmap.c | 2 +- fs/ubifs/recovery.c | 2 +- fs/xfs/quota/xfs_dquot.h | 2 +- include/asm-generic/memory_model.h | 2 +- include/asm-generic/unistd.h | 2 +- include/linux/chio.h | 2 +- include/linux/mfd/ezx-pcap.h | 4 +- include/linux/pktcdvd.h | 2 +- include/linux/serial_reg.h | 8 +-- include/linux/videodev2.h | 2 +- include/net/sctp/structs.h | 2 +- include/net/tcp.h | 2 +- include/net/wimax.h | 2 +- include/sound/wm8993.h | 2 +- kernel/perf_event.c | 4 +- lib/Kconfig.debug | 2 +- lib/decompress_bunzip2.c | 2 +- lib/dma-debug.c | 2 +- lib/swiotlb.c | 2 +- mm/filemap.c | 2 +- mm/memcontrol.c | 4 +- mm/memory-failure.c | 2 +- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/irda/irlap.c | 14 ++--- net/irda/irlap_event.c | 2 +- net/irda/irlmp.c | 4 +- net/mac80211/mesh_pathtbl.c | 4 +- net/netlabel/netlabel_domainhash.c | 2 +- net/sctp/sm_sideeffect.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- net/wimax/op-reset.c | 2 +- scripts/kconfig/mconf.c | 2 +- security/selinux/netlabel.c | 2 +- security/selinux/ss/services.c | 2 +- sound/Kconfig | 2 +- sound/isa/cs423x/cs4236.c | 2 +- sound/isa/opti9xx/miro.c | 2 +- sound/isa/opti9xx/opti92x-ad1848.c | 2 +- sound/oss/dmasound/dmasound_paula.c | 2 +- sound/pci/ca0106/ca0106_proc.c | 2 +- sound/pci/cs46xx/imgs/cwcdma.asp | 9 ++-- sound/pci/emu10k1/emu10k1x.c | 2 +- sound/pci/hda/patch_cmedia.c | 2 +- sound/pci/hda/patch_realtek.c | 2 +- sound/pci/rme9652/hdspm.c | 4 +- sound/soc/codecs/uda134x.c | 4 +- sound/soc/codecs/wm8903.c | 6 +-- sound/soc/codecs/wm8993.c | 4 +- sound/soc/s3c24xx/s3c24xx_simtec.c | 2 +- sound/soc/s6000/s6000-pcm.c | 2 +- sound/sound_core.c | 2 +- 345 files changed, 516 insertions(+), 508 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats index 99233902e09e..f91a973a37fe 100644 --- a/Documentation/ABI/testing/procfs-diskstats +++ b/Documentation/ABI/testing/procfs-diskstats @@ -8,7 +8,7 @@ Description: 1 - major number 2 - minor mumber 3 - device name - 4 - reads completed succesfully + 4 - reads completed successfully 5 - reads merged 6 - sectors read 7 - time spent reading (ms) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 5f3bedaf8e35..d2f90334bb93 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -4,7 +4,7 @@ Contact: Jerome Marchand Description: The /sys/block//stat files displays the I/O statistics of disk . They contain 11 fields: - 1 - reads completed succesfully + 1 - reads completed successfully 2 - reads merged 3 - sectors read 4 - time spent reading (ms) diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index df0d089d0fb9..f508a8a27fea 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -362,7 +362,7 @@ module_exit(board_cleanup); Multiple chip control - The nand driver can control chip arrays. Therefor the + The nand driver can control chip arrays. Therefore the board driver must provide an own select_chip function. This function must (de)select the requested chip. The function pointer in the nand_chip structure must diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml index 97002060ac4f..26303e58345f 100644 --- a/Documentation/DocBook/v4l/videodev2.h.xml +++ b/Documentation/DocBook/v4l/videodev2.h.xml @@ -492,7 +492,7 @@ struct v4l2_jpegcompression { * you do, leave them untouched. * Inluding less markers will make the * resulting code smaller, but there will - * be fewer aplications which can read it. + * be fewer applications which can read it. * The presence of the APP and COM marker * is influenced by APP_len and COM_len * ONLY, not by this property! */ diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl index 7a2e0e98986a..0d0f7b4d4b1a 100644 --- a/Documentation/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl @@ -5318,7 +5318,7 @@ struct _snd_pcm_runtime { pages of the given size and map them onto the virtually contiguous memory. The virtual pointer is addressed in runtime->dma_area. The physical address (runtime->dma_addr) is set to zero, - because the buffer is physically non-contigous. + because the buffer is physically non-contiguous. The physical address table is set up in sgbuf->table. You can get the physical address at a certain offset via snd_pcm_sgbuf_get_addr(). diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb index bf2a9cdfe7bb..c8238e44ed6b 100644 --- a/Documentation/dvb/README.dvb-usb +++ b/Documentation/dvb/README.dvb-usb @@ -85,7 +85,7 @@ http://www.linuxtv.org/wiki/index.php/DVB_USB - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems better settled there (added xfer_ops-struct) - created a common files for frontends (mc/p/mb) - 2004-09-28 - added support for a new device (Unkown, vendor ID is Hyper-Paltek) + 2004-09-28 - added support for a new device (Unknown, vendor ID is Hyper-Paltek) 2004-09-20 - added support for a new device (Compro DVB-U2000), thanks to Amaury Demol for reporting - changed usb TS transfer method (several urbs, stopping transfer diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 098de5bce00a..42208511b5c0 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -304,7 +304,7 @@ static void *map_zeroed_pages(unsigned int num) addr = mmap(NULL, getpagesize() * num, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) - err(1, "Mmaping %u pages of /dev/zero", num); + err(1, "Mmapping %u pages of /dev/zero", num); /* * One neat mmap feature is that you can close the fd, and it diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas index c851ef497795..84524e0cf9c3 100644 --- a/Documentation/scsi/ChangeLog.megaraid_sas +++ b/Documentation/scsi/ChangeLog.megaraid_sas @@ -185,7 +185,7 @@ ii. FW enables WCE bit in Mode Sense cmd for drives that are configured Disks are exposed with WCE=1. User is advised to enable Write Back mode only when the controller has battery backup. At this time Synhronize cache is not supported by the FW. Driver will short-cycle - the cmd and return sucess without sending down to FW. + the cmd and return success without sending down to FW. 1 Release Date : Sun Jan. 14 11:21:32 PDT 2007 - Sumant Patro /Bo Yang diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary index deab51ddc33e..4884cb33845d 100644 --- a/Documentation/spi/spi-summary +++ b/Documentation/spi/spi-summary @@ -538,7 +538,7 @@ SPI MESSAGE QUEUE The bulk of the driver will be managing the I/O queue fed by transfer(). That queue could be purely conceptual. For example, a driver used only -for low-frequency sensor acess might be fine using synchronous PIO. +for low-frequency sensor access might be fine using synchronous PIO. But the queue will probably be very real, using message->queue, PIO, often DMA (especially if the root filesystem is in SPI flash), and diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index a6e360d2055c..fc5790d36cd9 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -370,7 +370,7 @@ The default is 1 percent. mmap_min_addr This file indicates the amount of address space which a user process will -be restricted from mmaping. Since kernel null dereference bugs could +be restricted from mmapping. Since kernel null dereference bugs could accidentally operate based on the information in the first couple of pages of memory userspace processes should not be allowed to write to them. By default this value is set to 0 and no protections will be enforced by the diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 3f61825be499..6b29555b58b7 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -6,7 +6,7 @@ The modules are: xxxx vend:prod ---- -spca501 0000:0000 MystFromOri Unknow Camera +spca501 0000:0000 MystFromOri Unknown Camera m5602 0402:5602 ALi Video Camera Controller spca501 040a:0002 Kodak DVC-325 spca500 040a:0300 Kodak EZ200 diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 3ec4f2a22585..aa7f4d0639c4 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -301,7 +301,7 @@ static char *page_flag_name(uint64_t flags) present = (flags >> i) & 1; if (!page_flag_names[i]) { if (present) - fatal("unkown flag bit %d\n", i); + fatal("unknown flag bit %d\n", i); continue; } buf[j++] = present ? page_flag_names[i][0] : '_'; diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 10b403554b65..7b2c56d8f930 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -197,7 +197,7 @@ setup_memory_node(int nid, void *kernel_end) } if (bootmap_start == -1) - panic("couldn't find a contigous place for the bootmap"); + panic("couldn't find a contiguous place for the bootmap"); /* Allocate the bootmap and mark the whole MM as reserved. */ bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start, diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index 7713a08bb10c..37bda5f3dde3 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -82,7 +82,7 @@ static int scoop_gpio_get(struct gpio_chip *chip, unsigned offset) { struct scoop_dev *sdev = container_of(chip, struct scoop_dev, gpio); - /* XXX: I'm usure, but it seems so */ + /* XXX: I'm unsure, but it seems so */ return ioread16(sdev->base + SCOOP_GPRR) & (1 << (offset + 1)); } diff --git a/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h b/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h index 375066ad0186..cbf334d1c761 100644 --- a/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h +++ b/arch/arm/mach-bcmring/include/mach/csp/dmacHw_priv.h @@ -83,7 +83,7 @@ typedef struct { * @brief Get next available transaction width * * -* @return On sucess : Next avail able transaction width +* @return On success : Next available transaction width * On failure : dmacHw_TRANSACTION_WIDTH_8 * * @note diff --git a/arch/arm/mach-bcmring/include/mach/dma.h b/arch/arm/mach-bcmring/include/mach/dma.h index 847980c85c88..1f2c5319c056 100644 --- a/arch/arm/mach-bcmring/include/mach/dma.h +++ b/arch/arm/mach-bcmring/include/mach/dma.h @@ -651,7 +651,7 @@ int dma_map_add_region(DMA_MemMap_t *memMap, /* Stores state information about t /** * Creates a descriptor ring from a memory mapping. * -* @return 0 on sucess, error code otherwise. +* @return 0 on success, error code otherwise. */ /****************************************************************************/ diff --git a/arch/arm/mach-lh7a40x/include/mach/hardware.h b/arch/arm/mach-lh7a40x/include/mach/hardware.h index 48e827d2fa56..59d2ace35217 100644 --- a/arch/arm/mach-lh7a40x/include/mach/hardware.h +++ b/arch/arm/mach-lh7a40x/include/mach/hardware.h @@ -31,7 +31,7 @@ /* * This __REG() version gives the same results as the one above, except * that we are fooling gcc somehow so it generates far better and smaller - * assembly code for access to contigous registers. It's a shame that gcc + * assembly code for access to contiguous registers. It's a shame that gcc * doesn't guess this by itself. */ #include diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c index 36dc5413cc97..bdf96eb523bc 100644 --- a/arch/arm/mach-orion5x/pci.c +++ b/arch/arm/mach-orion5x/pci.c @@ -463,7 +463,7 @@ static void __init orion5x_setup_pci_wins(struct mbus_dram_target_info *dram) writel(win_enable, PCI_BAR_ENABLE); /* - * Disable automatic update of address remaping when writing to BARs. + * Disable automatic update of address remapping when writing to BARs. */ orion5x_setbits(PCI_ADDR_DECODE_CTRL, 1); } diff --git a/arch/arm/mach-pxa/include/mach/palmld.h b/arch/arm/mach-pxa/include/mach/palmld.h index 8721b8010221..ae536e86d8e8 100644 --- a/arch/arm/mach-pxa/include/mach/palmld.h +++ b/arch/arm/mach-pxa/include/mach/palmld.h @@ -91,7 +91,7 @@ /* BATTERY */ #define PALMLD_BAT_MAX_VOLTAGE 4000 /* 4.00V maximum voltage */ #define PALMLD_BAT_MIN_VOLTAGE 3550 /* 3.55V critical voltage */ -#define PALMLD_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMLD_BAT_MAX_CURRENT 0 /* unknown */ #define PALMLD_BAT_MIN_CURRENT 0 /* unknown */ #define PALMLD_BAT_MAX_CHARGE 1 /* unknown */ #define PALMLD_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmt5.h b/arch/arm/mach-pxa/include/mach/palmt5.h index d15662aba008..6baf7469d4ec 100644 --- a/arch/arm/mach-pxa/include/mach/palmt5.h +++ b/arch/arm/mach-pxa/include/mach/palmt5.h @@ -66,7 +66,7 @@ /* BATTERY */ #define PALMT5_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMT5_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMT5_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMT5_BAT_MAX_CURRENT 0 /* unknown */ #define PALMT5_BAT_MIN_CURRENT 0 /* unknown */ #define PALMT5_BAT_MAX_CHARGE 1 /* unknown */ #define PALMT5_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h index 3dc9b074ab46..3f9dd3fd4638 100644 --- a/arch/arm/mach-pxa/include/mach/palmtc.h +++ b/arch/arm/mach-pxa/include/mach/palmtc.h @@ -68,7 +68,7 @@ /* BATTERY */ #define PALMTC_BAT_MAX_VOLTAGE 4000 /* 4.00V maximum voltage */ #define PALMTC_BAT_MIN_VOLTAGE 3550 /* 3.55V critical voltage */ -#define PALMTC_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTC_BAT_MAX_CURRENT 0 /* unknown */ #define PALMTC_BAT_MIN_CURRENT 0 /* unknown */ #define PALMTC_BAT_MAX_CHARGE 1 /* unknown */ #define PALMTC_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmte2.h b/arch/arm/mach-pxa/include/mach/palmte2.h index 12361341f9d8..f89e989a7637 100644 --- a/arch/arm/mach-pxa/include/mach/palmte2.h +++ b/arch/arm/mach-pxa/include/mach/palmte2.h @@ -59,7 +59,7 @@ /* BATTERY */ #define PALMTE2_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMTE2_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMTE2_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTE2_BAT_MAX_CURRENT 0 /* unknown */ #define PALMTE2_BAT_MIN_CURRENT 0 /* unknown */ #define PALMTE2_BAT_MAX_CHARGE 1 /* unknown */ #define PALMTE2_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmtx.h b/arch/arm/mach-pxa/include/mach/palmtx.h index 1be0db6ed55e..10abc4f2e8e4 100644 --- a/arch/arm/mach-pxa/include/mach/palmtx.h +++ b/arch/arm/mach-pxa/include/mach/palmtx.h @@ -94,7 +94,7 @@ /* BATTERY */ #define PALMTX_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMTX_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMTX_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMTX_BAT_MAX_CURRENT 0 /* unknown */ #define PALMTX_BAT_MIN_CURRENT 0 /* unknown */ #define PALMTX_BAT_MAX_CHARGE 1 /* unknown */ #define PALMTX_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-pxa/include/mach/palmz72.h b/arch/arm/mach-pxa/include/mach/palmz72.h index 2806ef69ba5a..2bbcf70dd935 100644 --- a/arch/arm/mach-pxa/include/mach/palmz72.h +++ b/arch/arm/mach-pxa/include/mach/palmz72.h @@ -49,7 +49,7 @@ /* Battery */ #define PALMZ72_BAT_MAX_VOLTAGE 4000 /* 4.00v current voltage */ #define PALMZ72_BAT_MIN_VOLTAGE 3550 /* 3.55v critical voltage */ -#define PALMZ72_BAT_MAX_CURRENT 0 /* unknokn */ +#define PALMZ72_BAT_MAX_CURRENT 0 /* unknown */ #define PALMZ72_BAT_MIN_CURRENT 0 /* unknown */ #define PALMZ72_BAT_MAX_CHARGE 1 /* unknown */ #define PALMZ72_BAT_MIN_CHARGE 1 /* unknown */ diff --git a/arch/arm/mach-s3c6400/setup-sdhci.c b/arch/arm/mach-s3c6400/setup-sdhci.c index b93dafbee1f4..1039937403be 100644 --- a/arch/arm/mach-s3c6400/setup-sdhci.c +++ b/arch/arm/mach-s3c6400/setup-sdhci.c @@ -30,7 +30,7 @@ char *s3c6400_hsmmc_clksrcs[4] = { [0] = "hsmmc", [1] = "hsmmc", [2] = "mmc_bus", - /* [3] = "48m", - note not succesfully used yet */ + /* [3] = "48m", - note not successfully used yet */ }; void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev, diff --git a/arch/arm/mach-s3c6410/setup-sdhci.c b/arch/arm/mach-s3c6410/setup-sdhci.c index 20666f3bd478..816d2d9f9ef8 100644 --- a/arch/arm/mach-s3c6410/setup-sdhci.c +++ b/arch/arm/mach-s3c6410/setup-sdhci.c @@ -30,7 +30,7 @@ char *s3c6410_hsmmc_clksrcs[4] = { [0] = "hsmmc", [1] = "hsmmc", [2] = "mmc_bus", - /* [3] = "48m", - note not succesfully used yet */ + /* [3] = "48m", - note not successfully used yet */ }; diff --git a/arch/arm/mach-sa1100/dma.c b/arch/arm/mach-sa1100/dma.c index cb4521a6f42d..ad660350c296 100644 --- a/arch/arm/mach-sa1100/dma.c +++ b/arch/arm/mach-sa1100/dma.c @@ -65,7 +65,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id) /** - * sa1100_request_dma - allocate one of the SA11x0's DMA chanels + * sa1100_request_dma - allocate one of the SA11x0's DMA channels * @device: The SA11x0 peripheral targeted by this request * @device_id: An ascii name for the claiming device * @callback: Function to be called when the DMA completes diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx3.h b/arch/arm/plat-mxc/include/mach/iomux-mx3.h index 446f86763816..0c7802bbeccb 100644 --- a/arch/arm/plat-mxc/include/mach/iomux-mx3.h +++ b/arch/arm/plat-mxc/include/mach/iomux-mx3.h @@ -112,7 +112,7 @@ enum iomux_gp_func { * setups a single pin: * - reserves the pin so that it is not claimed by another driver * - setups the iomux according to the configuration - * - if the pin is configured as a GPIO, we claim it throug kernel gpiolib + * - if the pin is configured as a GPIO, we claim it through kernel gpiolib */ int mxc_iomux_alloc_pin(const unsigned int pin, const char *label); /* diff --git a/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h b/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h index 9f13061192c8..3887f3fe29d4 100644 --- a/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h +++ b/arch/arm/plat-mxc/include/mach/iomux-mxc91231.h @@ -48,7 +48,7 @@ * setups a single pin: * - reserves the pin so that it is not claimed by another driver * - setups the iomux according to the configuration - * - if the pin is configured as a GPIO, we claim it throug kernel gpiolib + * - if the pin is configured as a GPIO, we claim it through kernel gpiolib */ int mxc_iomux_alloc_pin(const unsigned int pin_mode, const char *label); /* diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c index 5cdbd605ac05..4ff6dfe04283 100644 --- a/arch/arm/plat-mxc/pwm.c +++ b/arch/arm/plat-mxc/pwm.c @@ -94,7 +94,7 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns) * register to follow the ratio of duty_ns vs. period_ns * accordingly. * - * This is good enought for programming the brightness of + * This is good enough for programming the brightness of * the LCD backlight. * * The real implementation would divide PERCLK[0] first by diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index b53125f41293..0e308913291b 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -1232,7 +1232,7 @@ static void create_dma_lch_chain(int lch_head, int lch_queue) * OMAP_DMA_DYNAMIC_CHAIN * @params - Channel parameters * - * @return - Succes : 0 + * @return - Success : 0 * Failure: -EINVAL/-ENOMEM */ int omap_request_dma_chain(int dev_id, const char *dev_name, diff --git a/arch/arm/plat-omap/include/mach/omap16xx.h b/arch/arm/plat-omap/include/mach/omap16xx.h index 0e69b504c25f..7560b4d583a3 100644 --- a/arch/arm/plat-omap/include/mach/omap16xx.h +++ b/arch/arm/plat-omap/include/mach/omap16xx.h @@ -124,7 +124,7 @@ #define TIPB_SWITCH_BASE (0xfffbc800) #define OMAP16XX_MMCSD2_SSW_MPU_CONF (TIPB_SWITCH_BASE + 0x160) -/* UART3 Registers Maping through MPU bus */ +/* UART3 Registers Mapping through MPU bus */ #define UART3_RHR (OMAP_UART3_BASE + 0) #define UART3_THR (OMAP_UART3_BASE + 0) #define UART3_DLL (OMAP_UART3_BASE + 0) diff --git a/arch/arm/plat-s3c24xx/include/plat/map.h b/arch/arm/plat-s3c24xx/include/plat/map.h index c4d133436fc7..bd534d32b993 100644 --- a/arch/arm/plat-s3c24xx/include/plat/map.h +++ b/arch/arm/plat-s3c24xx/include/plat/map.h @@ -64,7 +64,7 @@ /* the calculation for the VA of this must ensure that * it is the same distance apart from the UART in the * phsyical address space, as the initial mapping for the IO - * is done as a 1:1 maping. This puts it (currently) at + * is done as a 1:1 mapping. This puts it (currently) at * 0xFA800000, which is not in the way of any current mapping * by the base system. */ diff --git a/arch/avr32/boards/hammerhead/Kconfig b/arch/avr32/boards/hammerhead/Kconfig index fda2331f9789..5c13d785cc70 100644 --- a/arch/avr32/boards/hammerhead/Kconfig +++ b/arch/avr32/boards/hammerhead/Kconfig @@ -24,7 +24,7 @@ config BOARD_HAMMERHEAD_SND bool "Atmel AC97 Sound support" help This enables Sound support for the Hammerhead board. You may - also go trough the ALSA settings to get it working. + also go through the ALSA settings to get it working. Choose 'Y' here if you have ordered a Corona daugther board and want to make your board funky. diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 6b7325d634af..78cb3d38f899 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -619,7 +619,7 @@ asmlinkage notrace void trap_c(struct pt_regs *fp) /* * Similar to get_user, do some address checking, then dereference - * Return true on sucess, false on bad address + * Return true on success, false on bad address */ static bool get_instruction(unsigned short *val, unsigned short *address) { diff --git a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h index e06f4112c695..f9fd2b2a2956 100644 --- a/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h +++ b/arch/blackfin/mach-bf518/include/mach/defBF51x_base.h @@ -542,7 +542,7 @@ #define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ @@ -550,7 +550,7 @@ #define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h b/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h index f821700716ee..b9dbb73d7ef0 100644 --- a/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h +++ b/arch/blackfin/mach-bf527/include/mach/defBF52x_base.h @@ -544,7 +544,7 @@ #define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ @@ -552,7 +552,7 @@ #define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf537/include/mach/defBF534.h b/arch/blackfin/mach-bf537/include/mach/defBF534.h index cebb14feb1ba..a6d20ca57683 100644 --- a/arch/blackfin/mach-bf537/include/mach/defBF534.h +++ b/arch/blackfin/mach-bf537/include/mach/defBF534.h @@ -934,7 +934,7 @@ #define HMDMA0_CONTROL 0xFFC03300 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xFFC03304 /* HMDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xFFC03308 /* HMDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xFFC0330C /* HMDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xFFC03310 /* HMDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xFFC03314 /* HMDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xFFC03318 /* HMDMA0 Current Block Count Register */ @@ -942,7 +942,7 @@ #define HMDMA1_CONTROL 0xFFC03340 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xFFC03344 /* HMDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xFFC03348 /* HMDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xFFC0334C /* HMDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xFFC03350 /* HMDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xFFC03354 /* HMDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xFFC03358 /* HMDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF544.h b/arch/blackfin/mach-bf548/include/mach/defBF544.h index dd414ae4ba4c..39f588dcd382 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF544.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF544.h @@ -491,7 +491,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -501,7 +501,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h index 5a9dbabe0a68..c4dcf302d9f5 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF547.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h @@ -470,7 +470,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -480,7 +480,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF548.h b/arch/blackfin/mach-bf548/include/mach/defBF548.h index 82cd593f7391..a5079980968c 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF548.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF548.h @@ -853,7 +853,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -863,7 +863,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/blackfin/mach-bf548/include/mach/defBF549.h b/arch/blackfin/mach-bf548/include/mach/defBF549.h index 6fc6e39ab61b..f7f043560c6f 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF549.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF549.h @@ -1024,7 +1024,7 @@ #define HMDMA0_CONTROL 0xffc04500 /* Handshake MDMA0 Control Register */ #define HMDMA0_ECINIT 0xffc04504 /* Handshake MDMA0 Initial Edge Count Register */ #define HMDMA0_BCINIT 0xffc04508 /* Handshake MDMA0 Initial Block Count Register */ -#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshhold Register */ +#define HMDMA0_ECURGENT 0xffc0450c /* Handshake MDMA0 Urgent Edge Count Threshold Register */ #define HMDMA0_ECOVERFLOW 0xffc04510 /* Handshake MDMA0 Edge Count Overflow Interrupt Register */ #define HMDMA0_ECOUNT 0xffc04514 /* Handshake MDMA0 Current Edge Count Register */ #define HMDMA0_BCOUNT 0xffc04518 /* Handshake MDMA0 Current Block Count Register */ @@ -1034,7 +1034,7 @@ #define HMDMA1_CONTROL 0xffc04540 /* Handshake MDMA1 Control Register */ #define HMDMA1_ECINIT 0xffc04544 /* Handshake MDMA1 Initial Edge Count Register */ #define HMDMA1_BCINIT 0xffc04548 /* Handshake MDMA1 Initial Block Count Register */ -#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshhold Register */ +#define HMDMA1_ECURGENT 0xffc0454c /* Handshake MDMA1 Urgent Edge Count Threshold Register */ #define HMDMA1_ECOVERFLOW 0xffc04550 /* Handshake MDMA1 Edge Count Overflow Interrupt Register */ #define HMDMA1_ECOUNT 0xffc04554 /* Handshake MDMA1 Current Edge Count Register */ #define HMDMA1_BCOUNT 0xffc04558 /* Handshake MDMA1 Current Block Count Register */ diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 4a7cdd9ea1ee..380df1a73a6e 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -209,7 +209,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source - * when it acesses user-memory. When it fails in one + * when it accesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 674a8374c6d9..f332e3fe4237 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1381,7 +1381,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, #endif /* - ** Not virtually contigous. + ** Not virtually contiguous. ** Terminate prev chunk. ** Start a new chunk. ** diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index af9405cd70e5..02d1fb732951 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -79,7 +79,7 @@ GLOBAL_ENTRY(ia32_ret_from_clone) (p6) br.cond.spnt .ia32_strace_check_retval ;; // prevent RAW on r8 END(ia32_ret_from_clone) - // fall thrugh + // fall through GLOBAL_ENTRY(ia32_ret_from_syscall) PT_REGS_UNWIND_INFO(0) diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h index 48822c0811d8..74724b24c2b7 100644 --- a/arch/ia64/include/asm/perfmon_default_smpl.h +++ b/arch/ia64/include/asm/perfmon_default_smpl.h @@ -67,7 +67,7 @@ typedef struct { unsigned long ip; /* where did the overflow interrupt happened */ unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */ - unsigned short cpu; /* cpu on which the overfow occured */ + unsigned short cpu; /* cpu on which the overflow occured */ unsigned short set; /* event set active when overflow ocurred */ int tgid; /* thread group id (for NPTL, this is getpid()) */ } pfm_default_smpl_entry_t; diff --git a/arch/ia64/include/asm/sn/shubio.h b/arch/ia64/include/asm/sn/shubio.h index 22a6f18a5313..6052422a22b3 100644 --- a/arch/ia64/include/asm/sn/shubio.h +++ b/arch/ia64/include/asm/sn/shubio.h @@ -3289,7 +3289,7 @@ typedef ii_icrb0_e_u_t icrbe_t; #define IIO_IIDSR_LVL_SHIFT 0 #define IIO_IIDSR_LVL_MASK 0x000000ff -/* Xtalk timeout threshhold register (IIO_IXTT) */ +/* Xtalk timeout threshold register (IIO_IXTT) */ #define IXTT_RRSP_TO_SHFT 55 /* read response timeout */ #define IXTT_RRSP_TO_MASK (0x1FULL << IXTT_RRSP_TO_SHFT) #define IXTT_RRSP_PS_SHFT 32 /* read responsed TO prescalar */ diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c index d5764a3d74af..b091111270cb 100644 --- a/arch/ia64/kernel/esi.c +++ b/arch/ia64/kernel/esi.c @@ -84,7 +84,7 @@ static int __init esi_init (void) case ESI_DESC_ENTRY_POINT: break; default: - printk(KERN_WARNING "Unkown table type %d found in " + printk(KERN_WARNING "Unknown table type %d found in " "ESI table, ignoring rest of table\n", *p); return -ENODEV; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f1782705b1f7..b3a1cb3e6b25 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -3523,7 +3523,7 @@ pfm_use_debug_registers(struct task_struct *task) * IA64_THREAD_DBG_VALID set. This indicates a task which was * able to use the debug registers for debugging purposes via * ptrace(). Therefore we know it was not using them for - * perfmormance monitoring, so we only decrement the number + * performance monitoring, so we only decrement the number * of "ptraced" debug register users to keep the count up to date */ int diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S index 6c1a9a217887..73613b5f1ee5 100644 --- a/arch/m68k/ifpsp060/src/fpsp.S +++ b/arch/m68k/ifpsp060/src/fpsp.S @@ -753,7 +753,7 @@ fovfl_ovfl_on: bra.l _real_ovfl -# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). fovfl_inex_on: @@ -1015,7 +1015,7 @@ funfl_unfl_on2: bra.l _real_unfl -# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). funfl_inex_on: @@ -2963,7 +2963,7 @@ iea_disabled: tst.w %d0 # is instr fmovm? bmi.b iea_dis_fmovm # yes -# instruction is using an extended precision immediate operand. therefore, +# instruction is using an extended precision immediate operand. Therefore, # the total instruction length is 16 bytes. iea_dis_immed: mov.l &0x10,%d0 # 16 bytes of instruction @@ -9624,7 +9624,7 @@ sok_dnrm: bge.b sok_norm2 # thank goodness no # the multiply factor that we're trying to create should be a denorm -# for the multiply to work. therefore, we're going to actually do a +# for the multiply to work. Therefore, we're going to actually do a # multiply with a denorm which will cause an unimplemented data type # exception to be put into the machine which will be caught and corrected # later. we don't do this with the DENORMs above because this method @@ -12216,7 +12216,7 @@ fin_sd_unfl_dis: # # operand will underflow AND underflow or inexact is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fin_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -12746,7 +12746,7 @@ fdiv_zero_load_p: # # The destination was In Range and the source was a ZERO. The result, -# therefore, is an INF w/ the proper sign. +# Therefore, is an INF w/ the proper sign. # So, determine the sign and return a new INF (w/ the j-bit cleared). # global fdiv_inf_load # global for fsgldiv @@ -12996,7 +12996,7 @@ fneg_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fneg_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -13611,7 +13611,7 @@ fabs_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fabs_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -14973,7 +14973,7 @@ fadd_zero_2: # # the ZEROes have opposite signs: -# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. +# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. # - -ZERO is returned in the case of RM. # fadd_zero_2_chk_rm: @@ -15425,7 +15425,7 @@ fsub_zero_2: # # the ZEROes have the same signs: -# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP +# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP # - -ZERO is returned in the case of RM. # fsub_zero_2_chk_rm: @@ -15693,7 +15693,7 @@ fsqrt_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fsqrt_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -21000,7 +21000,7 @@ fout_pack_type: tst.l %d0 bne.b fout_pack_set # "mantissa" is all zero which means that the answer is zero. but, the '040 -# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore, +# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, # if the mantissa is zero, I will zero the exponent, too. # the question now is whether the exponents sign bit is allowed to be non-zero # for a zero, also... @@ -21743,7 +21743,7 @@ denorm_set_stky: rts # # -# dnrm_lp(): normalize exponent/mantissa to specified threshhold # +# dnrm_lp(): normalize exponent/mantissa to specified threshold # # # # INPUT: # # %a0 : points to the operand to be denormalized # @@ -22402,7 +22402,7 @@ unnorm_shift: bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 # -# exponent would not go < 0. therefore, number stays normalized +# exponent would not go < 0. Therefore, number stays normalized # sub.w %d0, %d1 # shift exponent value mov.w FTEMP_EX(%a0), %d0 # load old exponent diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S index 51b9f7d879dd..e71ba0ab013c 100644 --- a/arch/m68k/ifpsp060/src/pfpsp.S +++ b/arch/m68k/ifpsp060/src/pfpsp.S @@ -752,7 +752,7 @@ fovfl_ovfl_on: bra.l _real_ovfl -# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). fovfl_inex_on: @@ -1014,7 +1014,7 @@ funfl_unfl_on2: bra.l _real_unfl -# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore, +# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, # we must jump to real_inex(). funfl_inex_on: @@ -2962,7 +2962,7 @@ iea_disabled: tst.w %d0 # is instr fmovm? bmi.b iea_dis_fmovm # yes -# instruction is using an extended precision immediate operand. therefore, +# instruction is using an extended precision immediate operand. Therefore, # the total instruction length is 16 bytes. iea_dis_immed: mov.l &0x10,%d0 # 16 bytes of instruction @@ -5865,7 +5865,7 @@ denorm_set_stky: rts # # -# dnrm_lp(): normalize exponent/mantissa to specified threshhold # +# dnrm_lp(): normalize exponent/mantissa to specified threshold # # # # INPUT: # # %a0 : points to the operand to be denormalized # @@ -6524,7 +6524,7 @@ unnorm_shift: bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 # -# exponent would not go < 0. therefore, number stays normalized +# exponent would not go < 0. Therefore, number stays normalized # sub.w %d0, %d1 # shift exponent value mov.w FTEMP_EX(%a0), %d0 # load old exponent @@ -7901,7 +7901,7 @@ fout_pack_type: tst.l %d0 bne.b fout_pack_set # "mantissa" is all zero which means that the answer is zero. but, the '040 -# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore, +# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, # if the mantissa is zero, I will zero the exponent, too. # the question now is whether the exponents sign bit is allowed to be non-zero # for a zero, also... @@ -8647,7 +8647,7 @@ fin_sd_unfl_dis: # # operand will underflow AND underflow or inexact is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fin_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -9177,7 +9177,7 @@ fdiv_zero_load_p: # # The destination was In Range and the source was a ZERO. The result, -# therefore, is an INF w/ the proper sign. +# Therefore, is an INF w/ the proper sign. # So, determine the sign and return a new INF (w/ the j-bit cleared). # global fdiv_inf_load # global for fsgldiv @@ -9427,7 +9427,7 @@ fneg_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fneg_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -10042,7 +10042,7 @@ fabs_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fabs_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) @@ -11404,7 +11404,7 @@ fadd_zero_2: # # the ZEROes have opposite signs: -# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. +# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. # - -ZERO is returned in the case of RM. # fadd_zero_2_chk_rm: @@ -11856,7 +11856,7 @@ fsub_zero_2: # # the ZEROes have the same signs: -# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP +# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP # - -ZERO is returned in the case of RM. # fsub_zero_2_chk_rm: @@ -12124,7 +12124,7 @@ fsqrt_sd_unfl_dis: # # operand will underflow AND underflow is enabled. -# therefore, we must return the result rounded to extended precision. +# Therefore, we must return the result rounded to extended precision. # fsqrt_sd_unfl_ena: mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) diff --git a/arch/m68k/include/asm/bootinfo.h b/arch/m68k/include/asm/bootinfo.h index fb8a06b9ab6a..67e7a78ad96b 100644 --- a/arch/m68k/include/asm/bootinfo.h +++ b/arch/m68k/include/asm/bootinfo.h @@ -145,7 +145,7 @@ struct bi_record { /* * Macintosh hardware profile data - unused, see macintosh.h for - * resonable type values + * reasonable type values */ #define BI_MAC_VIA1BASE 0x8010 /* Mac VIA1 base address (always present) */ diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c index 6a907c58a4bc..cc2108b6b260 100644 --- a/arch/microblaze/lib/memcpy.c +++ b/arch/microblaze/lib/memcpy.c @@ -9,7 +9,7 @@ * It is based on demo code originally Copyright 2001 by Intel Corp, taken from * http://www.embedded.com/showArticle.jhtml?articleID=19205567 * - * Attempts were made, unsuccesfully, to contact the original + * Attempts were made, unsuccessfully, to contact the original * author of this code (Michael Morrow, Intel). Below is the original * copyright notice. * diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c index d4e9f49a71f7..0929198c5e68 100644 --- a/arch/microblaze/lib/memmove.c +++ b/arch/microblaze/lib/memmove.c @@ -9,7 +9,7 @@ * It is based on demo code originally Copyright 2001 by Intel Corp, taken from * http://www.embedded.com/showArticle.jhtml?articleID=19205567 * - * Attempts were made, unsuccesfully, to contact the original + * Attempts were made, unsuccessfully, to contact the original * author of this code (Michael Morrow, Intel). Below is the original * copyright notice. * diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c index 941dc8f94b03..4df851d41a29 100644 --- a/arch/microblaze/lib/memset.c +++ b/arch/microblaze/lib/memset.c @@ -9,7 +9,7 @@ * It is based on demo code originally Copyright 2001 by Intel Corp, taken from * http://www.embedded.com/showArticle.jhtml?articleID=19205567 * - * Attempts were made, unsuccesfully, to contact the original + * Attempts were made, unsuccessfully, to contact the original * author of this code (Michael Morrow, Intel). Below is the original * copyright notice. * diff --git a/arch/mips/include/asm/mach-pnx833x/gpio.h b/arch/mips/include/asm/mach-pnx833x/gpio.h index 8de0eb9c98a3..ed3a88da70f6 100644 --- a/arch/mips/include/asm/mach-pnx833x/gpio.h +++ b/arch/mips/include/asm/mach-pnx833x/gpio.h @@ -24,7 +24,7 @@ /* BIG FAT WARNING: races danger! No protections exist here. Current users are only early init code, - when locking is not needed because no cuncurency yet exists there, + when locking is not needed because no concurrency yet exists there, and GPIO IRQ dispatcher, which does locking. However, if many uses will ever happen, proper locking will be needed - including locking between different uses diff --git a/arch/mips/include/asm/sgi/ioc.h b/arch/mips/include/asm/sgi/ioc.h index 343ed15f8dc4..57a971904cfe 100644 --- a/arch/mips/include/asm/sgi/ioc.h +++ b/arch/mips/include/asm/sgi/ioc.h @@ -164,7 +164,7 @@ struct sgioc_regs { u32 _unused5; u8 _write[3]; volatile u8 write; -#define SGIOC_WRITE_NTHRESH 0x01 /* use 4.5db threshhold */ +#define SGIOC_WRITE_NTHRESH 0x01 /* use 4.5db threshold */ #define SGIOC_WRITE_TPSPEED 0x02 /* use 100ohm TP speed */ #define SGIOC_WRITE_EPSEL 0x04 /* force cable mode: 1=AUI 0=TP */ #define SGIOC_WRITE_EASEL 0x08 /* 1=autoselect 0=manual cable selection */ diff --git a/arch/mips/include/asm/sibyte/sb1250_mac.h b/arch/mips/include/asm/sibyte/sb1250_mac.h index b6faf08ca81d..591b9061fd8e 100644 --- a/arch/mips/include/asm/sibyte/sb1250_mac.h +++ b/arch/mips/include/asm/sibyte/sb1250_mac.h @@ -212,7 +212,7 @@ #define G_MAC_TXD_WEIGHT1(x) _SB_GETVALUE(x, S_MAC_TXD_WEIGHT1, M_MAC_TXD_WEIGHT1) /* - * MAC Fifo Threshhold registers (Table 9-14) + * MAC Fifo Threshold registers (Table 9-14) * Register: MAC_THRSH_CFG_0 * Register: MAC_THRSH_CFG_1 * Register: MAC_THRSH_CFG_2 diff --git a/arch/mips/include/asm/sn/sn0/hubio.h b/arch/mips/include/asm/sn/sn0/hubio.h index d0c29d4de084..31c76c021bb6 100644 --- a/arch/mips/include/asm/sn/sn0/hubio.h +++ b/arch/mips/include/asm/sn/sn0/hubio.h @@ -825,7 +825,7 @@ typedef union iprb_u { struct { u64 rsvd1: 15, error: 1, /* Widget rcvd wr resp pkt w/ error */ - ovflow: 5, /* Over flow count. perf measurement */ + ovflow: 5, /* Overflow count. perf measurement */ fire_and_forget: 1, /* Launch Write without response */ mode: 2, /* Widget operation Mode */ rsvd2: 2, diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 24630fd8ef60..a38e3ee95515 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -1331,7 +1331,7 @@ void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) if (!((asid += ASID_INC) & ASID_MASK) ) { if (cpu_has_vtag_icache) flush_icache_all(); - /* Traverse all online CPUs (hack requires contigous range) */ + /* Traverse all online CPUs (hack requires contiguous range) */ for_each_online_cpu(i) { /* * We don't need to worry about our own CPU, nor those of diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c index b30c5b1f1a2c..a2127d685a0d 100644 --- a/arch/mips/math-emu/dp_sub.c +++ b/arch/mips/math-emu/dp_sub.c @@ -110,7 +110,7 @@ ieee754dp ieee754dp_sub(ieee754dp x, ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* FAAL THOROUGH */ + /* FALL THROUGH */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): /* normalize ym,ye */ diff --git a/arch/mips/txx9/generic/smsc_fdc37m81x.c b/arch/mips/txx9/generic/smsc_fdc37m81x.c index a2b2d62d88e3..8ebc3848f3ac 100644 --- a/arch/mips/txx9/generic/smsc_fdc37m81x.c +++ b/arch/mips/txx9/generic/smsc_fdc37m81x.c @@ -117,7 +117,7 @@ unsigned long __init smsc_fdc37m81x_init(unsigned long port) if (chip_id == SMSC_FDC37M81X_CHIP_ID) smsc_fdc37m81x_config_end(); else { - printk(KERN_WARNING "%s: unknow chip id 0x%02x\n", __func__, + printk(KERN_WARNING "%s: unknown chip id 0x%02x\n", __func__, chip_id); g_smsc_fdc37m81x_base = 0; } diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h index 1e180a594589..0de404dfee8b 100644 --- a/arch/powerpc/include/asm/reg_fsl_emb.h +++ b/arch/powerpc/include/asm/reg_fsl_emb.h @@ -39,7 +39,7 @@ #define PMRN_PMLCB2 0x112 /* PM Local Control B2 */ #define PMRN_PMLCB3 0x113 /* PM Local Control B3 */ -#define PMLCB_THRESHMUL_MASK 0x0700 /* Threshhold Multiple Field */ +#define PMLCB_THRESHMUL_MASK 0x0700 /* Threshold Multiple Field */ #define PMLCB_THRESHMUL_SHIFT 8 #define PMLCB_THRESHOLD_MASK 0x003f /* Threshold Field */ diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 641c74bb8e27..b6bd1eaa1c24 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -52,7 +52,7 @@ static struct hard_trap_info { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ - { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ + { 0x2060, 0x0e /* SIGILL */ }, /* performance monitor */ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index c3a56d65c5a9..a753b72efbc0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -59,7 +59,7 @@ void set_thresholds(unsigned long cpu) mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshhold + * threshold, valid bit, enable interrupts, interrupt when above threshold */ mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); #else diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 52c98edcd703..2c9e52267292 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -1594,7 +1594,7 @@ static void cell_handle_interrupt_spu(struct pt_regs *regs, * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get - * this to work as desired, the permormance monitor needs to + * this to work as desired, the performance monitor needs to * be disabled while writing to the latches. This is a * HW design issue. */ @@ -1668,7 +1668,7 @@ static void cell_handle_interrupt_ppu(struct pt_regs *regs, * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get - * this to work as desired, the permormance monitor needs to + * this to work as desired, the performance monitor needs to * be disabled while writing to the latches. This is a * HW design issue. */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c index dd43114e9684..da110bd88346 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -100,7 +100,7 @@ const struct of_device_id mpc52xx_pci_ids[] __initdata = { }; /* ======================================================================== */ -/* PCI configuration acess */ +/* PCI configuration access */ /* ======================================================================== */ static int diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index e81403b245b5..ab2027cdf893 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -302,7 +302,7 @@ static void __init setup_chaos(struct pci_controller *hose, * 1 -> Skip the device but act as if the access was successfull * (return 0xff's on reads, eventually, cache config space * accesses in a later version) - * -1 -> Hide the device (unsuccessful acess) + * -1 -> Hide the device (unsuccessful access) */ static int u3_ht_skip_device(struct pci_controller *hose, struct pci_bus *bus, unsigned int devfn) diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index ae3c4db86fe8..bafc3f85360d 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -160,7 +160,7 @@ static int dart_build(struct iommu_table *tbl, long index, dp = ((unsigned int*)tbl->it_base) + index; - /* On U3, all memory is contigous, so we can move this + /* On U3, all memory is contiguous, so we can move this * out of the loop. */ l = npages; diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c index 3ee78ccb617d..cd4e9c168dd7 100644 --- a/arch/s390/math-emu/math.c +++ b/arch/s390/math-emu/math.c @@ -2088,7 +2088,7 @@ int math_emu_ldr(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ asm volatile( /* load rx from fp_regs.fprs[ry] */ " bras 1,0f\n" " ld 0,0(%1)\n" @@ -2118,7 +2118,7 @@ int math_emu_ler(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ asm volatile( /* load rx from fp_regs.fprs[ry] */ " bras 1,0f\n" " le 0,0(%1)\n" diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 9d6684849fd9..278441f39856 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -12,9 +12,9 @@ #include /* - * FIXME: Acessing the desc_struct through its fields is more elegant, + * FIXME: Accessing the desc_struct through its fields is more elegant, * and should be the one valid thing to do. However, a lot of open code - * still touches the a and b acessors, and doing this allow us to do it + * still touches the a and b accessors, and doing this allow us to do it * incrementally. We keep the signature as a struct, rather than an union, * so we can get rid of it transparently in the future -- glommer */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ede6998bd92c..91df7c51806c 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {} /* * generic node memory support, the following assumptions apply: * - * 1) memory comes in 64Mb contigious chunks which are either present or not + * 1) memory comes in 64Mb contiguous chunks which are either present or not * 2) we will not have more than 64Gb in total * * for now assume that 64Gb is max amount of RAM for whole system diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 80e2984f521c..b414d2b401f6 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -55,7 +55,7 @@ #define DESC_STATUS_SOURCE_TIMEOUT 3 /* - * source side threshholds at which message retries print a warning + * source side thresholds at which message retries print a warning */ #define SOURCE_TIMEOUT_LIMIT 20 #define DESTINATION_TIMEOUT_LIMIT 20 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 67e929b89875..1c2c4838d35c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1122,7 +1122,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) if (!acpi_sci_override_gsi) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); - /* Fill in identity legacy mapings where no override */ + /* Fill in identity legacy mappings where no override */ mp_config_acpi_legacy_irqs(); count = diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a99..42ac5e000995 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1637,7 +1637,7 @@ retry: goto out; /* - * aperture was sucessfully enlarged by 128 MB, try + * aperture was successfully enlarged by 128 MB, try * allocation again */ goto retry; @@ -2396,7 +2396,7 @@ int __init amd_iommu_init_passthrough(void) struct pci_dev *dev = NULL; u16 devid, devid2; - /* allocate passthroug domain */ + /* allocate passthrough domain */ pt_domain = protection_domain_alloc(); if (!pt_domain) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..35be5802ac1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1229,7 +1229,7 @@ x86_perf_event_set_period(struct perf_event *event, return 0; /* - * If we are way outside a reasoable range then just skip forward: + * If we are way outside a reasonable range then just skip forward: */ if (unlikely(left <= -period)) { left = period; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..7d377379fa4a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -514,7 +514,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. + * remain disabled throughout this function. */ static int __kprobes kprobe_handler(struct pt_regs *regs) { @@ -851,7 +851,7 @@ no_change: /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. + * remain disabled throughout this function. */ static int __kprobes post_kprobe_handler(struct pt_regs *regs) { diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..d16d576beebf 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -203,7 +203,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) */ /* * Interrupts are disabled on entry as trap3 is an interrupt gate - * and they remain disabled thorough out this function. + * and they remain disabled throughout this function. */ int kmmio_handler(struct pt_regs *regs, unsigned long addr) { @@ -302,7 +302,7 @@ no_kmmio: /* * Interrupts are disabled on entry as trap1 is an interrupt gate - * and they remain disabled thorough out this function. + * and they remain disabled throughout this function. * This must always get called as the pair to kmmio_handler(). */ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index ca564202ed7a..58916afbbda5 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); * Description: * Add this blk_iopoll structure to the pending poll list and trigger the * raise of the blk iopoll softirq. The driver must already have gotten a - * succesful return from blk_iopoll_sched_prep() before calling this. + * successful return from blk_iopoll_sched_prep() before calling this. **/ void blk_iopoll_sched(struct blk_iopoll *iop) { diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 9ac4e378992e..3aadded05a05 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -599,7 +599,7 @@ static const struct ich_laptop ich_laptop[] = { { 0x27DF, 0x1028, 0x02b0 }, /* ICH7 on unknown Dell */ { 0x27DF, 0x1043, 0x1267 }, /* ICH7 on Asus W5F */ { 0x27DF, 0x103C, 0x30A1 }, /* ICH7 on HP Compaq nc2400 */ - { 0x27DF, 0x103C, 0x361a }, /* ICH7 on unkown HP */ + { 0x27DF, 0x103C, 0x361a }, /* ICH7 on unknown HP */ { 0x27DF, 0x1071, 0xD221 }, /* ICH7 on Hercules EC-900 */ { 0x27DF, 0x152D, 0x0778 }, /* ICH7 on unknown Intel */ { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on ACER Aspire 2023WLMi */ diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index d344db42a002..0d9d2f20788a 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -43,9 +43,9 @@ enum { /* * SATA-FSL host controller supports a max. of (15+1) direct PRDEs, and * chained indirect PRDEs upto a max count of 63. - * We are allocating an array of 63 PRDEs contigiously, but PRDE#15 will + * We are allocating an array of 63 PRDEs contiguously, but PRDE#15 will * be setup as an indirect descriptor, pointing to it's next - * (contigious) PRDE. Though chained indirect PRDE arrays are + * (contiguous) PRDE. Though chained indirect PRDE arrays are * supported,it will be more efficient to use a direct PRDT and * a single chain/link to indirect PRDE array/PRDT. */ @@ -314,7 +314,7 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc, u32 ttl_dwords = 0; /* - * NOTE : direct & indirect prdt's are contigiously allocated + * NOTE : direct & indirect prdt's are contiguously allocated */ struct prde *prd = (struct prde *)&((struct command_desc *) cmd_desc)->prdt; diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index b2c1b37ab2e4..f734b345ac71 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1132,7 +1132,7 @@ static int rx_pkt(struct atm_dev *dev) IF_ERR(printk(" cause: packet time out\n");) } else { - IF_ERR(printk(" cause: buffer over flow\n");) + IF_ERR(printk(" cause: buffer overflow\n");) } goto out_free_desc; } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 979d159b5cd1..ee95c76bfd3d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe); * @dev: device to try to bind to the driver * * This function returns -ENODEV if the device is not registered, - * 1 if the device is bound sucessfully and 0 otherwise. + * 1 if the device is bound successfully and 0 otherwise. * * This function must be called with @dev->sem held. When called for a * USB interface, @dev->parent->sem must be held as well. diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c index 5b33b85790f2..63bfc5436799 100644 --- a/drivers/bluetooth/btmrvl_sdio.c +++ b/drivers/bluetooth/btmrvl_sdio.c @@ -535,7 +535,7 @@ static int btmrvl_sdio_card_to_host(struct btmrvl_private *priv) break; default: - BT_ERR("Unknow packet type:%d", type); + BT_ERR("Unknown packet type:%d", type); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, payload, blksz * buf_block_len); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 4895f0e05322..aa0919386b8c 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -214,7 +214,7 @@ static int hci_uart_send_frame(struct sk_buff *skb) struct hci_uart *hu; if (!hdev) { - BT_ERR("Frame for uknown device (hdev=NULL)"); + BT_ERR("Frame for unknown device (hdev=NULL)"); return -ENODEV; } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index a074fceb67d3..42e65cf8ab52 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -5,7 +5,7 @@ * * Added devfs support. * Jan-11-1998, C. Scott Ananian - * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar + * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar */ #include diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index 1997270bb6f4..ecb89d798e35 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -248,7 +248,7 @@ static const struct vm_operations_struct mspec_vm_ops = { /* * mspec_mmap * - * Called when mmaping the device. Initializes the vma with a fault handler + * Called when mmapping the device. Initializes the vma with a fault handler * and private data structure necessary to allocate, track, and free the * underlying pages. */ diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 6934025a1ac1..c1d8b54c816d 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -602,7 +602,7 @@ static void receive_char(struct r3964_info *pInfo, const unsigned char c) } break; case R3964_WAIT_FOR_RX_REPEAT: - /* FALLTROUGH */ + /* FALLTHROUGH */ case R3964_IDLE: if (c == STX) { /* Prevent rx_queue from overflow: */ diff --git a/drivers/char/rio/route.h b/drivers/char/rio/route.h index 20ed73f3fd7b..46e963771c30 100644 --- a/drivers/char/rio/route.h +++ b/drivers/char/rio/route.h @@ -67,7 +67,7 @@ typedef struct COST_ROUTE COST_ROUTE; struct COST_ROUTE { unsigned char cost; /* Cost down this link */ - unsigned char route[NODE_BYTES]; /* Nodes thorough this route */ + unsigned char route[NODE_BYTES]; /* Nodes through this route */ }; typedef struct ROUTE_STR ROUTE_STR; diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index 5f753fc08730..09ad9154d86c 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -863,7 +863,7 @@ static int hifn_init_pubrng(struct hifn_device *dev) dev->dmareg |= HIFN_DMAIER_PUBDONE; hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg); - dprintk("Chip %s: Public key engine has been sucessfully " + dprintk("Chip %s: Public key engine has been successfully " "initialised.\n", dev->name); } diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 7585c4164bd5..c52ac9efd0bf 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -99,7 +99,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, } /** - * atc_desc_get - get a unsused descriptor from free_list + * atc_desc_get - get an unused descriptor from free_list * @atchan: channel we want a new descriptor for */ static struct at_desc *atc_desc_get(struct at_dma_chan *atchan) diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index fddf2b358936..d373d17257e9 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -183,7 +183,7 @@ static inline struct fw_node *fw_node(struct list_head *l) * This function builds the tree representation of the topology given * by the self IDs from the latest bus reset. During the construction * of the tree, the function checks that the self IDs are valid and - * internally consistent. On succcess this function returns the + * internally consistent. On success this function returns the * fw_node corresponding to the local card otherwise NULL. */ static struct fw_node *build_tree(struct fw_card *card, diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 5cae0b3eee9b..3f7c500b2115 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -272,7 +272,7 @@ EXPORT_SYMBOL(drm_mode_object_find); * functions & device file and adds it to the master fd list. * * RETURNS: - * Zero on success, error code on falure. + * Zero on success, error code on failure. */ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) @@ -2328,7 +2328,7 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev, } else if (connector->funcs->set_property) ret = connector->funcs->set_property(connector, property, out_resp->value); - /* store the property value if succesful */ + /* store the property value if successful */ if (!ret) drm_connector_property_set_value(connector, property, out_resp->value); out: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index abfc27b0c2ea..a2a3fa599923 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1309,7 +1309,7 @@ out_free_list: * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question * - * Preserve the reservation of the mmaping with the DRM core code, but + * Preserve the reservation of the mmapping with the DRM core code, but * relinquish ownership of the pages back to the system. * * It is vital that we remove the page mapping if we have mapped a tiled diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 2b0fe54cd92c..40fcf6fdef38 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -70,7 +70,7 @@ static struct drm_fb_helper_funcs intel_fb_helper_funcs = { /** - * Curretly it is assumed that the old framebuffer is reused. + * Currently it is assumed that the old framebuffer is reused. * * LOCKING * caller should hold the mode config lock. diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 083bec2e50f9..e7fa3279e2f8 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2726,7 +2726,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) /* Wrap with our custom algo which switches to DDC mode */ intel_output->ddc_bus->algo = &intel_sdvo_i2c_bit_algo; - /* In defaut case sdvo lvds is false */ + /* In default case sdvo lvds is false */ intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); if (intel_sdvo_output_setup(intel_output, diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 609719490ec2..00c739c44848 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -389,11 +389,11 @@ int r600_mc_init(struct radeon_device *rdev) * AGP so that GPU can catch out of VRAM/AGP access */ if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { - /* Enought place before */ + /* Enough place before */ rdev->mc.vram_location = rdev->mc.gtt_location - rdev->mc.mc_vram_size; } else if (tmp > rdev->mc.mc_vram_size) { - /* Enought place after */ + /* Enough place after */ rdev->mc.vram_location = rdev->mc.gtt_location + rdev->mc.gtt_size; } else { diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index b38c4c8e2c61..d10eb43645c8 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -59,7 +59,7 @@ static struct fb_ops radeonfb_ops = { }; /** - * Curretly it is assumed that the old framebuffer is reused. + * Currently it is assumed that the old framebuffer is reused. * * LOCKING * caller should hold the mode config lock. diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 38537d971a3e..067167cb39ca 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1950,7 +1950,7 @@ static void radeon_apply_surface_regs(int surf_index, * Note that refcount can be at most 2, since during a free refcount=3 * might mean we have to allocate a new surface which might not always * be available. - * For example : we allocate three contigous surfaces ABC. If B is + * For example : we allocate three contiguous surfaces ABC. If B is * freed, we suddenly need two surfaces to store A and C, which might * not always be available. */ diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 765bd184b6fc..5a664000bf70 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -372,7 +372,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, new_mem->mem_type == TTM_PL_SYSTEM) || (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_TT)) { - /* bind is enought */ + /* bind is enough */ radeon_move_null(bo, new_mem); return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 595ac638039d..9e9826ace305 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -807,11 +807,11 @@ int rv770_mc_init(struct radeon_device *rdev) * AGP so that GPU can catch out of VRAM/AGP access */ if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) { - /* Enought place before */ + /* Enough place before */ rdev->mc.vram_location = rdev->mc.gtt_location - rdev->mc.mc_vram_size; } else if (tmp > rdev->mc.mc_vram_size) { - /* Enought place after */ + /* Enough place after */ rdev->mc.vram_location = rdev->mc.gtt_location + rdev->mc.gtt_size; } else { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index c70927ecda21..61c5572d2b91 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -427,7 +427,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, /* * We need to use vmap to get the desired page protection - * or to make the buffer object look contigous. + * or to make the buffer object look contiguous. */ prot = (mem->placement & TTM_PL_FLAG_CACHED) ? PAGE_KERNEL : diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c index 36718150b475..e845b75ccee4 100644 --- a/drivers/hwmon/adm1029.c +++ b/drivers/hwmon/adm1029.c @@ -432,7 +432,7 @@ static int adm1029_remove(struct i2c_client *client) } /* -function that update the status of the chips (temperature for exemple) +function that update the status of the chips (temperature for example) */ static struct adm1029_data *adm1029_update_device(struct device *dev) { diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c index fc36cadf36fb..c48a284b8314 100644 --- a/drivers/hwmon/lm93.c +++ b/drivers/hwmon/lm93.c @@ -928,7 +928,7 @@ static void lm93_update_client_common(struct lm93_data *data, data->prochot_interval = lm93_read_byte(client, LM93_REG_PROCHOT_INTERVAL); - /* Fan Boost Termperature registers */ + /* Fan Boost Temperature registers */ for (i = 0; i < 4; i++) data->boost[i] = lm93_read_byte(client, LM93_REG_BOOST(i)); diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index 2cd00b5b45b4..9fd4a0d3206e 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -125,7 +125,7 @@ 0 - no debugging messages 1 - some debugging messages, but none during DMA frame transmission 2 - lots of messages, including during DMA frame transmission - (will cause undeflows if your machine is too slow!) + (will cause underflows if your machine is too slow!) */ #define DV1394_DEBUG_LEVEL 0 diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 4bd39c8af80f..37d12e5efa49 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -381,7 +381,7 @@ static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = #define IPATH_GPIO_SCL \ (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -/* keep the code below somewhat more readonable; not used elsewhere */ +/* keep the code below somewhat more readable; not used elsewhere */ #define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ infinipath_hwe_htclnkabyte1crcerr) #define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/ipath/ipath_sd7220.c index aa47eb549520..2a68d9f624dd 100644 --- a/drivers/infiniband/hw/ipath/ipath_sd7220.c +++ b/drivers/infiniband/hw/ipath/ipath_sd7220.c @@ -614,7 +614,7 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) * @wd: Write Data - value to set in register * @mask: ones where data should be spliced into reg. * - * Basic register read/modify/write, with un-needed acesses elided. That is, + * Basic register read/modify/write, with un-needed accesses elided. That is, * a mask of zero will prevent write, while a mask of 0xFF will prevent read. * returns current (presumed, if a write was done) contents of selected * register, or <0 if errors. @@ -989,7 +989,7 @@ static struct rxeq_init { /* Set DFELTHFDR/HDR thresholds */ RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR */ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */ - /* Set TLTHFDR/HDR theshold */ + /* Set TLTHFDR/HDR threshold */ RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR */ RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */ /* Set Preamp setting 2 (ZFR/ZCNT) */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 219b10397b4d..256a00c6aeea 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -352,7 +352,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * anymore, so we do this only if selective signaling is off. * * Further, on 32-bit platforms, we can't use vmap() to make - * the QP buffer virtually contigious. Thus we have to use + * the QP buffer virtually contiguous. Thus we have to use * constant-sized WRs to make sure a WR is always fully within * a single page-sized chunk. * diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 1c9410d1822c..bcc2d30ec245 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -955,7 +955,7 @@ static int __init hp_sdc_init_hppa(struct parisc_device *d) INIT_DELAYED_WORK(&moduleloader_work, request_module_delayed); ret = hp_sdc_init(); - /* after sucessfull initialization give SDC some time to settle + /* after successfull initialization give SDC some time to settle * and then load the hp_sdc_mlc upper layer driver */ if (!ret) schedule_delayed_work(&moduleloader_work, diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index 820e51673b26..7d2b820ef58d 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -125,7 +125,7 @@ static void hp_sdc_mlc_isr (int irq, void *dev_id, break; default: - printk(KERN_WARNING PREFIX "Unkown HIL Error status (%x)!\n", data); + printk(KERN_WARNING PREFIX "Unknown HIL Error status (%x)!\n", data); break; } diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c index 35377f583e28..a12242f77e23 100644 --- a/drivers/input/touchscreen/atmel-wm97xx.c +++ b/drivers/input/touchscreen/atmel-wm97xx.c @@ -59,7 +59,7 @@ #define ATMEL_WM97XX_AC97C_IRQ (29) #define ATMEL_WM97XX_GPIO_DEFAULT (32+16) /* Pin 16 on port B. */ #else -#error Unkown CPU, this driver only supports AT32AP700X CPUs. +#error Unknown CPU, this driver only supports AT32AP700X CPUs. #endif struct continuous { diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index 8fc3b08deb3b..6cdcf2a6e036 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -14,7 +14,7 @@ * * Notes: * This is a wm97xx extended touch driver to capture touch - * data in a continuous manner on the Intel XScale archictecture + * data in a continuous manner on the Intel XScale architecture * * Features: * - codecs supported:- WM9705, WM9712, WM9713 @@ -131,7 +131,7 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) /* When the AC97 queue has been drained we need to allow time * to buffer up samples otherwise we end up spinning polling * for samples. The controller can't have a suitably low - * threashold set to use the notifications it gives. + * threshold set to use the notifications it gives. */ schedule_timeout_uninterruptible(1); diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c index 41e4359c277c..eca54dbdf493 100644 --- a/drivers/input/touchscreen/zylonite-wm97xx.c +++ b/drivers/input/touchscreen/zylonite-wm97xx.c @@ -96,7 +96,7 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) /* When the AC97 queue has been drained we need to allow time * to buffer up samples otherwise we end up spinning polling * for samples. The controller can't have a suitably low - * threashold set to use the notifications it gives. + * threshold set to use the notifications it gives. */ msleep(1); diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c index 3e6d17f42a98..66b7d7a86474 100644 --- a/drivers/isdn/capi/capidrv.c +++ b/drivers/isdn/capi/capidrv.c @@ -830,7 +830,7 @@ static void handle_controller(_cmsg * cmsg) case 0: break; case 1: s = "unknown class"; break; case 2: s = "unknown function"; break; - default: s = "unkown error"; break; + default: s = "unknown error"; break; } if (s) printk(KERN_INFO "capidrv-%d: %s from controller 0x%x function %d: %s\n", diff --git a/drivers/isdn/hardware/eicon/di.c b/drivers/isdn/hardware/eicon/di.c index b029d130eb21..cb14ae3e7154 100644 --- a/drivers/isdn/hardware/eicon/di.c +++ b/drivers/isdn/hardware/eicon/di.c @@ -806,7 +806,7 @@ static void xdi_xlog_request (byte Adapter, byte Id, DELIVERY - indication entered isdn_rc function RNR=... - application had returned RNR=... after the look ahead callback - RNum=0 - aplication had not returned any buffer to copy + RNum=0 - application had not returned any buffer to copy this indication and will copy it self COMPLETE - XDI had copied the data to the buffers provided bu the application and is about to issue the diff --git a/drivers/isdn/hardware/eicon/maintidi.c b/drivers/isdn/hardware/eicon/maintidi.c index 23960cb6eaab..e7cfb3b5647f 100644 --- a/drivers/isdn/hardware/eicon/maintidi.c +++ b/drivers/isdn/hardware/eicon/maintidi.c @@ -385,7 +385,7 @@ static int SuperTraceMessageInput (void* hLib) { } break; default: - diva_mnt_internal_dprintf (0, DLI_ERR, "Unknon IDI Ind (DMA mode): %02x", Ind); + diva_mnt_internal_dprintf (0, DLI_ERR, "Unknown IDI Ind (DMA mode): %02x", Ind); } p += (this_ind_length+1); total_length -= (4 + this_ind_length); @@ -420,7 +420,7 @@ static int SuperTraceMessageInput (void* hLib) { } break; default: - diva_mnt_internal_dprintf (0, DLI_ERR, "Unknon IDI Ind: %02x", Ind); + diva_mnt_internal_dprintf (0, DLI_ERR, "Unknown IDI Ind: %02x", Ind); } } } diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index fc46a26cb14f..a64bb6c67ba7 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -721,7 +721,7 @@ hfcsusb_setup_bch(struct bchannel *bch, int protocol) switch (protocol) { case (-1): /* used for init */ bch->state = -1; - /* fall trough */ + /* fall through */ case (ISDN_P_NONE): if (bch->state == ISDN_P_NONE) return 0; /* already in idle state */ diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.h b/drivers/isdn/hardware/mISDN/hfcsusb.h index 43efe7358fa3..369196adae03 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.h +++ b/drivers/isdn/hardware/mISDN/hfcsusb.h @@ -150,7 +150,7 @@ symbolic(struct hfcusb_symbolic_list list[], const int num) for (i = 0; list[i].name != NULL; i++) if (list[i].num == num) return list[i].name; - return ""; + return ""; } /* USB descriptor need to contain one of the following EndPoint combination: */ diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c index de352a17673a..09095c747110 100644 --- a/drivers/isdn/hardware/mISDN/mISDNisar.c +++ b/drivers/isdn/hardware/mISDN/mISDNisar.c @@ -860,7 +860,7 @@ isar_pump_statev_modem(struct isar_ch *ch, u8 devt) { pr_debug("%s: pump stev GSTN CLEAR\n", ch->is->name); break; default: - pr_info("u%s: nknown pump stev %x\n", ch->is->name, devt); + pr_info("u%s: unknown pump stev %x\n", ch->is->name, devt); break; } } diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index 9de54202c90c..ad5831f37d84 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -1086,7 +1086,7 @@ hfc_usb_l2l1(struct hisax_if *my_hisax_if, int pr, void *arg) break; default: DBG(HFCUSB_DBG_STATES, - "HFC_USB: hfc_usb_d_l2l1: unkown state : %#x", pr); + "HFC_USB: hfc_usb_d_l2l1: unknown state : %#x", pr); break; } } diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 2d14b64202a3..0f4ea7d16a15 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -836,7 +836,7 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) unsigned short hl; struct sk_buff *skb; /* - * we need to reserve enought space in front of + * we need to reserve enough space in front of * sk_buff. old call to dev_alloc_skb only reserved * 16 bytes, now we are looking what the driver want */ @@ -1326,7 +1326,7 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) struct sk_buff *new_skb; unsigned short hl; /* - * we need to reserve enought space in front of + * we need to reserve enough space in front of * sk_buff. old call to dev_alloc_skb only reserved * 16 bytes, now we are looking what the driver want. */ @@ -1685,7 +1685,7 @@ static void isdn_ppp_mp_receive(isdn_net_dev * net_dev, isdn_net_local * lp, * * try to accomplish several tasks: * - reassemble any complete fragment sequence (non-null 'start' - * indicates there is a continguous sequence present) + * indicates there is a contiguous sequence present) * - discard any incomplete sequences that are below minseq -- due * to the fact that sender always increment sequence number, if there * is an incomplete sequence below minseq, no new fragments would diff --git a/drivers/isdn/i4l/isdn_ttyfax.c b/drivers/isdn/i4l/isdn_ttyfax.c index 78f7660c1d0e..4c41f191d4e2 100644 --- a/drivers/isdn/i4l/isdn_ttyfax.c +++ b/drivers/isdn/i4l/isdn_ttyfax.c @@ -470,7 +470,7 @@ isdn_tty_cmd_FCLASS2(char **p, modem_info * info) } return 0; } - /* BADMUL=value - dummy 0=disable errorchk disabled (treshold multiplier) */ + /* BADMUL=value - dummy 0=disable errorchk disabled (threshold multiplier) */ if (!strncmp(p[0], "BADMUL", 6)) { p[0] += 6; switch (*p[0]) { diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c index 77ee2867c8b4..43ff4d3b046e 100644 --- a/drivers/isdn/mISDN/dsp_core.c +++ b/drivers/isdn/mISDN/dsp_core.c @@ -110,7 +110,7 @@ * crossconnections and conferences via software if not possible through * hardware. If hardware capability is available, hardware is used. * - * Echo: Is generated by CMX and is used to check performane of hard and + * Echo: Is generated by CMX and is used to check performance of hard and * software CMX. * * The CMX has special functions for conferences with one, two and more diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index e04bad6c5baf..6d4da6095885 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -725,7 +725,7 @@ tei_id_ver_tout_net(struct FsmInst *fi, int event, void *arg) if (tm->rcnt == 1) { if (*debug & DEBUG_L2_TEI) tm->tei_m.printdebug(fi, - "check req for tei %d sucessful\n", tm->l2->tei); + "check req for tei %d successful\n", tm->l2->tei); mISDN_FsmChangeState(fi, ST_TEI_NOP); } else if (tm->rcnt > 1) { /* duplicate assignment; remove */ diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 8b9364434aa0..3fbe41b0ac07 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -15,7 +15,7 @@ * * WARNING: This driver has only been testen on Apple's * 1.25 MHz Dual G4 (March 03). It is tuned for a CPU - * temperatur around 57 C. + * temperature around 57 C. * * Copyright (C) 2003, 2004 Samuel Rydh (samuel@ibrium.se) * diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c index 7e8f56815998..48cb154c7a46 100644 --- a/drivers/media/common/saa7146_i2c.c +++ b/drivers/media/common/saa7146_i2c.c @@ -98,7 +98,7 @@ static int saa7146_i2c_msg_cleanup(const struct i2c_msg *m, int num, __le32 *op) op_count++; - /* loop throgh all bytes of message i */ + /* loop through all bytes of message i */ for(j = 0; j < m[i].len; j++) { /* write back all bytes that could have been read */ m[i].buf[j] = (le32_to_cpu(op[op_count/3]) >> ((3-(op_count%3))*8)); diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h index 810f07d63246..52e4ce4304ee 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.h +++ b/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -160,7 +160,7 @@ struct tuner_state { * search callback possible return status * * DVBFE_ALGO_SEARCH_SUCCESS - * The frontend search algorithm completed and returned succesfully + * The frontend search algorithm completed and returned successfully * * DVBFE_ALGO_SEARCH_ASLEEP * The frontend search algorithm is sleeping diff --git a/drivers/media/dvb/dvb-usb/anysee.c b/drivers/media/dvb/dvb-usb/anysee.c index 2ae7f648effe..bb69f3719f9a 100644 --- a/drivers/media/dvb/dvb-usb/anysee.c +++ b/drivers/media/dvb/dvb-usb/anysee.c @@ -344,7 +344,7 @@ static int anysee_frontend_attach(struct dvb_usb_adapter *adap) if (ret) return ret; - err("Unkown Anysee version: %02x %02x %02x. "\ + err("Unknown Anysee version: %02x %02x %02x. "\ "Please report the .", hw_info[0], hw_info[1], hw_info[2]); diff --git a/drivers/media/dvb/dvb-usb/dibusb-mb.c b/drivers/media/dvb/dvb-usb/dibusb-mb.c index eeef50bff4f9..5c0126dc1ff9 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-mb.c +++ b/drivers/media/dvb/dvb-usb/dibusb-mb.c @@ -242,7 +242,7 @@ static struct dvb_usb_device_properties dibusb1_1_properties = { { &dibusb_dib3000mb_table[9], &dibusb_dib3000mb_table[11], NULL }, { &dibusb_dib3000mb_table[10], &dibusb_dib3000mb_table[12], NULL }, }, - { "Unkown USB1.1 DVB-T device ???? please report the name to the author", + { "Unknown USB1.1 DVB-T device ???? please report the name to the author", { &dibusb_dib3000mb_table[13], NULL }, { &dibusb_dib3000mb_table[14], NULL }, }, diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c index edde87c6aa3a..6b5ded9e7d5d 100644 --- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c +++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c @@ -259,7 +259,7 @@ int dvb_usb_nec_rc_key_to_event(struct dvb_usb_device *d, *state = REMOTE_KEY_REPEAT; break; default: - deb_err("unkown type of remote status: %d\n",keybuf[0]); + deb_err("unknown type of remote status: %d\n",keybuf[0]); break; } return 0; diff --git a/drivers/media/dvb/dvb-usb/usb-urb.c b/drivers/media/dvb/dvb-usb/usb-urb.c index 9da2cc95ca13..f9702e3756b6 100644 --- a/drivers/media/dvb/dvb-usb/usb-urb.c +++ b/drivers/media/dvb/dvb-usb/usb-urb.c @@ -56,7 +56,7 @@ static void usb_urb_complete(struct urb *urb) stream->complete(stream, b, urb->actual_length); break; default: - err("unkown endpoint type in completition handler."); + err("unknown endpoint type in completition handler."); return; } usb_submit_urb(urb,GFP_ATOMIC); @@ -228,7 +228,7 @@ int usb_urb_init(struct usb_data_stream *stream, struct usb_data_stream_properti case USB_ISOC: return usb_isoc_urb_init(stream); default: - err("unkown URB-type for data transfer."); + err("unknown URB-type for data transfer."); return -EINVAL; } } diff --git a/drivers/media/dvb/frontends/au8522_decoder.c b/drivers/media/dvb/frontends/au8522_decoder.c index 74981ee923c8..7c6431fe33e0 100644 --- a/drivers/media/dvb/frontends/au8522_decoder.c +++ b/drivers/media/dvb/frontends/au8522_decoder.c @@ -315,7 +315,7 @@ static void setup_decoder_defaults(struct au8522_state *state, u8 input_mode) if (input_mode == AU8522_INPUT_CONTROL_REG081H_SVIDEO_CH13 || input_mode == AU8522_INPUT_CONTROL_REG081H_SVIDEO_CH24) { /* Despite what the table says, for the HVR-950q we still need - to be in CVBS mode for the S-Video input (reason uknown). */ + to be in CVBS mode for the S-Video input (reason unknown). */ /* filter_coef_type = 3; */ filter_coef_type = 5; } else { diff --git a/drivers/media/dvb/frontends/cx24110.c b/drivers/media/dvb/frontends/cx24110.c index ffbcfabd83f0..00a4e8f03304 100644 --- a/drivers/media/dvb/frontends/cx24110.c +++ b/drivers/media/dvb/frontends/cx24110.c @@ -1,4 +1,4 @@ - /* +/* cx24110 - Single Chip Satellite Channel Receiver driver module Copyright (C) 2002 Peter Hettkamp based on @@ -96,7 +96,7 @@ static struct {u8 reg; u8 data;} cx24110_regdata[]= {0x42,0x00}, /* @ middle bytes " */ {0x43,0x00}, /* @ LSB " */ /* leave the carrier tracking loop parameters on default */ - /* leave the bit timing loop parameters at gefault */ + /* leave the bit timing loop parameters at default */ {0x56,0x4d}, /* set the filtune voltage to 2.7V, as recommended by */ /* the cx24108 data sheet for symbol rates above 15MS/s */ {0x57,0x00}, /* @ Filter sigma delta enabled, positive */ diff --git a/drivers/media/dvb/frontends/cx24113.c b/drivers/media/dvb/frontends/cx24113.c index 075b2b57cf09..e9ee55592fd3 100644 --- a/drivers/media/dvb/frontends/cx24113.c +++ b/drivers/media/dvb/frontends/cx24113.c @@ -589,7 +589,7 @@ struct dvb_frontend *cx24113_attach(struct dvb_frontend *fe, info("detected CX24113 variant\n"); break; case REV_CX24113: - info("sucessfully detected\n"); + info("successfully detected\n"); break; default: err("unsupported device id: %x\n", state->rev); diff --git a/drivers/media/dvb/frontends/dib3000mb.c b/drivers/media/dvb/frontends/dib3000mb.c index 136b9d2164d7..ad4c8cfd8090 100644 --- a/drivers/media/dvb/frontends/dib3000mb.c +++ b/drivers/media/dvb/frontends/dib3000mb.c @@ -154,7 +154,7 @@ static int dib3000mb_set_frontend(struct dvb_frontend* fe, case BANDWIDTH_AUTO: return -EOPNOTSUPP; default: - err("unkown bandwidth value."); + err("unknown bandwidth value."); return -EINVAL; } } diff --git a/drivers/media/dvb/frontends/lgdt330x.c b/drivers/media/dvb/frontends/lgdt330x.c index 056387b41a8f..43971e63baa7 100644 --- a/drivers/media/dvb/frontends/lgdt330x.c +++ b/drivers/media/dvb/frontends/lgdt330x.c @@ -479,7 +479,7 @@ static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status) switch (state->current_modulation) { case QAM_256: case QAM_64: - /* Need to undestand why there are 3 lock levels here */ + /* Need to understand why there are 3 lock levels here */ if ((buf[0] & 0x07) == 0x07) *status |= FE_HAS_CARRIER; break; @@ -520,7 +520,7 @@ static int lgdt3303_read_status(struct dvb_frontend* fe, fe_status_t* status) switch (state->current_modulation) { case QAM_256: case QAM_64: - /* Need to undestand why there are 3 lock levels here */ + /* Need to understand why there are 3 lock levels here */ if ((buf[0] & 0x07) == 0x07) *status |= FE_HAS_CARRIER; else diff --git a/drivers/media/dvb/frontends/stb0899_drv.c b/drivers/media/dvb/frontends/stb0899_drv.c index a04c782fff8d..1570669837ea 100644 --- a/drivers/media/dvb/frontends/stb0899_drv.c +++ b/drivers/media/dvb/frontends/stb0899_drv.c @@ -1571,7 +1571,7 @@ static enum dvbfe_search stb0899_search(struct dvb_frontend *fe, struct dvb_fron * stb0899_track * periodically check the signal level against a specified * threshold level and perform derotator centering. - * called once we have a lock from a succesful search + * called once we have a lock from a successful search * event. * * Will be called periodically called to maintain the diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c index 8d65c652ba50..baf3159a3aa6 100644 --- a/drivers/media/dvb/ttpci/av7110.c +++ b/drivers/media/dvb/ttpci/av7110.c @@ -2425,7 +2425,7 @@ static int __devinit av7110_attach(struct saa7146_dev* dev, * use 0x15 to track VPE interrupts - increase by 1 every vpeirq() is called */ saa7146_write(dev, EC1SSR, (0x03<<2) | 3 ); - /* set event counter 1 treshold to maximum allowed value (rEC p55) */ + /* set event counter 1 threshold to maximum allowed value (rEC p55) */ saa7146_write(dev, ECT1R, 0x3fff ); #endif /* Set RPS1 Address register to point to RPS code (r108 p42) */ @@ -2559,7 +2559,7 @@ static int __devinit av7110_attach(struct saa7146_dev* dev, * use 0x15 to track VPE interrupts - increase by 1 every vpeirq() is called */ saa7146_write(dev, EC1SSR, (0x03<<2) | 3 ); - /* set event counter 1 treshold to maximum allowed value (rEC p55) */ + /* set event counter 1 threshold to maximum allowed value (rEC p55) */ saa7146_write(dev, ECT1R, 0x3fff ); #endif /* Setup BUDGETPATCH MAIN RPS1 "program" (p35) */ diff --git a/drivers/media/dvb/ttpci/budget-patch.c b/drivers/media/dvb/ttpci/budget-patch.c index 60136688a9a4..9c92f9ddd223 100644 --- a/drivers/media/dvb/ttpci/budget-patch.c +++ b/drivers/media/dvb/ttpci/budget-patch.c @@ -456,7 +456,7 @@ static int budget_patch_attach (struct saa7146_dev* dev, struct saa7146_pci_exte // use 0x03 to track RPS1 interrupts - increase by 1 every gpio3 is toggled // use 0x15 to track VPE interrupts - increase by 1 every vpeirq() is called saa7146_write(dev, EC1SSR, (0x03<<2) | 3 ); - // set event counter 1 treshold to maximum allowed value (rEC p55) + // set event counter 1 threshold to maximum allowed value (rEC p55) saa7146_write(dev, ECT1R, 0x3fff ); #endif // Fix VSYNC level diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c index a1239083472d..5f79acb56e48 100644 --- a/drivers/media/radio/radio-mr800.c +++ b/drivers/media/radio/radio-mr800.c @@ -28,7 +28,7 @@ * http://av-usbradio.sourceforge.net/index.php * http://sourceforge.net/projects/av-usbradio/ * Latest release of theirs project was in 2005. - * Probably, this driver could be improved trough using their + * Probably, this driver could be improved through using their * achievements (specifications given). * Also, Faidon Liambotis wrote nice driver for this radio * in 2007. He allowed to use his driver to improve current mr800 radio driver. diff --git a/drivers/media/video/cx231xx/cx231xx-avcore.c b/drivers/media/video/cx231xx/cx231xx-avcore.c index 28f48f41f218..c2174413ab29 100644 --- a/drivers/media/video/cx231xx/cx231xx-avcore.c +++ b/drivers/media/video/cx231xx/cx231xx-avcore.c @@ -2414,9 +2414,11 @@ int cx231xx_gpio_i2c_read_ack(struct cx231xx *dev) cx231xx_info("No ACK after %d msec -GPIO I2C failed!", nInit * 10); - /* readAck - throuth clock stretch ,slave has given a SCL signal, - so the SDA data can be directly read. */ + /* + * readAck + * through clock stretch, slave has given a SCL signal, + * so the SDA data can be directly read. + */ status = cx231xx_get_gpio_bit(dev, dev->gpio_dir, (u8 *)&dev->gpio_val); if ((dev->gpio_val & 1 << dev->board.tuner_sda_gpio) == 0) { diff --git a/drivers/media/video/cx23885/cx23885-dvb.c b/drivers/media/video/cx23885/cx23885-dvb.c index 45e13ee66dc7..16c6a921f40b 100644 --- a/drivers/media/video/cx23885/cx23885-dvb.c +++ b/drivers/media/video/cx23885/cx23885-dvb.c @@ -940,7 +940,7 @@ int cx23885_dvb_register(struct cx23885_tsport *port) int err, i; /* Here we need to allocate the correct number of frontends, - * as reflected in the cards struct. The reality is that currrently + * as reflected in the cards struct. The reality is that currently * no cx23885 boards support this - yet. But, if we don't modify this * code then the second frontend would never be allocated (later) * and fail with error before the attach in dvb_register(). diff --git a/drivers/media/video/cx88/cx88-core.c b/drivers/media/video/cx88/cx88-core.c index cf634606ba9a..b35411160f04 100644 --- a/drivers/media/video/cx88/cx88-core.c +++ b/drivers/media/video/cx88/cx88-core.c @@ -624,7 +624,7 @@ int cx88_reset(struct cx88_core *core) /* setup image format */ cx_andor(MO_COLOR_CTRL, 0x4000, 0x4000); - /* setup FIFO Threshholds */ + /* setup FIFO Thresholds */ cx_write(MO_PDMA_STHRSH, 0x0807); cx_write(MO_PDMA_DTHRSH, 0x0807); diff --git a/drivers/media/video/davinci/dm355_ccdc.c b/drivers/media/video/davinci/dm355_ccdc.c index 56fbefe036ae..314390016370 100644 --- a/drivers/media/video/davinci/dm355_ccdc.c +++ b/drivers/media/video/davinci/dm355_ccdc.c @@ -285,7 +285,7 @@ static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam) if ((ccdcparam->med_filt_thres < 0) || (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) { - dev_dbg(dev, "Invalid value of median filter thresold\n"); + dev_dbg(dev, "Invalid value of median filter threshold\n"); return -EINVAL; } diff --git a/drivers/media/video/davinci/vpss.c b/drivers/media/video/davinci/vpss.c index 6d709ca8cfb0..453236bd7559 100644 --- a/drivers/media/video/davinci/vpss.c +++ b/drivers/media/video/davinci/vpss.c @@ -53,7 +53,7 @@ struct vpss_hw_ops { int (*enable_clock)(enum vpss_clock_sel clock_sel, int en); /* select input to ccdc */ void (*select_ccdc_source)(enum vpss_ccdc_source_sel src_sel); - /* clear wbl overlflow bit */ + /* clear wbl overflow bit */ int (*clear_wbl_overflow)(enum vpss_wbl_sel wbl_sel); }; diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c index cf3af8de6e97..e39efb45fa1c 100644 --- a/drivers/media/video/gspca/sonixb.c +++ b/drivers/media/video/gspca/sonixb.c @@ -304,7 +304,7 @@ static const __u8 initOv6650[] = { }; static const __u8 ov6650_sensor_init[][8] = { - /* Bright, contrast, etc are set througth SCBB interface. + /* Bright, contrast, etc are set through SCBB interface. * AVCAP on win2 do not send any data on this controls. */ /* Anyway, some registers appears to alter bright and constrat */ diff --git a/drivers/media/video/gspca/spca500.c b/drivers/media/video/gspca/spca500.c index fab7ef85a6c1..7dbd5eea6cc0 100644 --- a/drivers/media/video/gspca/spca500.c +++ b/drivers/media/video/gspca/spca500.c @@ -589,7 +589,7 @@ static void spca500_reinit(struct gspca_dev *gspca_dev) int err; __u8 Data; - /* some unknow command from Aiptek pocket dv and family300 */ + /* some unknown command from Aiptek pocket dv and family300 */ reg_w(gspca_dev, 0x00, 0x0d01, 0x01); reg_w(gspca_dev, 0x00, 0x0d03, 0x00); diff --git a/drivers/media/video/gspca/spca501.c b/drivers/media/video/gspca/spca501.c index b74a34218da0..66f9f0056146 100644 --- a/drivers/media/video/gspca/spca501.c +++ b/drivers/media/video/gspca/spca501.c @@ -1636,7 +1636,7 @@ static const __u16 spca501c_arowana_init_data[][3] = { {} }; -/* Unknow camera from Ori Usbid 0x0000:0x0000 */ +/* Unknown camera from Ori Usbid 0x0000:0x0000 */ /* Based on snoops from Ori Cohen */ static const __u16 spca501c_mysterious_open_data[][3] = { {0x02, 0x000f, 0x0005}, @@ -1945,7 +1945,7 @@ static int sd_init(struct gspca_dev *gspca_dev) goto error; break; case MystFromOriUnknownCamera: - /* UnKnow Ori CMOS Camera data */ + /* Unknown Ori CMOS Camera data */ if (write_vector(gspca_dev, spca501c_mysterious_open_data)) goto error; break; @@ -1978,7 +1978,7 @@ static int sd_start(struct gspca_dev *gspca_dev) write_vector(gspca_dev, spca501c_arowana_open_data); break; case MystFromOriUnknownCamera: - /* UnKnow CMOS Camera data */ + /* Unknown CMOS Camera data */ write_vector(gspca_dev, spca501c_mysterious_init_data); break; default: diff --git a/drivers/media/video/gspca/sunplus.c b/drivers/media/video/gspca/sunplus.c index aa8f995ce04e..1a9af2ebdbef 100644 --- a/drivers/media/video/gspca/sunplus.c +++ b/drivers/media/video/gspca/sunplus.c @@ -631,7 +631,7 @@ static void spca504A_acknowledged_command(struct gspca_dev *gspca_dev, count = 200; while (--count > 0) { msleep(10); - /* gsmart mini2 write a each wait setting 1 ms is enought */ + /* gsmart mini2 write a each wait setting 1 ms is enough */ /* reg_w_riv(dev, req, idx, val); */ status = reg_r_12(gspca_dev, 0x01, 0x0001, 1); if (status == endcode) { diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c index cdf3357b4c9f..49c3c1226e0e 100644 --- a/drivers/media/video/gspca/zc3xx.c +++ b/drivers/media/video/gspca/zc3xx.c @@ -7065,7 +7065,7 @@ static int sd_config(struct gspca_dev *gspca_dev, break; default: PDEBUG(D_PROBE, - "Sensor UNKNOW_0 force Tas5130"); + "Sensor UNKNOWN_0 force Tas5130"); sd->sensor = SENSOR_TAS5130CXX; } break; diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h index 5b152ff20bd0..5fcad28211d2 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h +++ b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h @@ -332,7 +332,7 @@ struct pvr2_hdw { /* Bit mask of PVR2_CVAL_INPUT choices which are valid for the hardware */ unsigned int input_avail_mask; - /* Bit mask of PVR2_CVAL_INPUT choices which are currenly allowed */ + /* Bit mask of PVR2_CVAL_INPUT choices which are currently allowed */ unsigned int input_allowed_mask; /* Location of eeprom or a negative number if none */ diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index 9e3262c0ba37..a4c84368eb10 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -1985,7 +1985,7 @@ static int save_frame(struct s2255_dev *dev, struct s2255_pipeinfo *pipe_info) wake_up(&dev->fw_data->wait_fw); break; default: - printk(KERN_INFO "s2255 unknwn resp\n"); + printk(KERN_INFO "s2255 unknown resp\n"); } default: pdata++; diff --git a/drivers/media/video/zoran/zoran.h b/drivers/media/video/zoran/zoran.h index d439c76b27e1..cb1de7ea197a 100644 --- a/drivers/media/video/zoran/zoran.h +++ b/drivers/media/video/zoran/zoran.h @@ -106,7 +106,7 @@ struct zoran_params { unsigned long jpeg_markers; /* Which markers should go into the JPEG output. * Unless you exactly know what you do, leave them untouched. * Inluding less markers will make the resulting code - * smaller, but there will be fewer aplications + * smaller, but there will be fewer applications * which can read it. * The presence of the APP and COM marker is * influenced by APP0_len and COM_len ONLY! */ diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index d505b68cd372..e39986a78273 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -940,7 +940,7 @@ static const struct block_device_operations i2o_block_fops = { * Allocate memory for the i2o_block_device struct, gendisk and request * queue and initialize them as far as no additional information is needed. * - * Returns a pointer to the allocated I2O Block device on succes or a + * Returns a pointer to the allocated I2O Block device on success or a * negative error code on failure. */ static struct i2o_block_device *i2o_block_device_alloc(void) diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c index 27cf4af0e13d..e5ab62141503 100644 --- a/drivers/message/i2o/iop.c +++ b/drivers/message/i2o/iop.c @@ -132,7 +132,7 @@ u32 i2o_cntxt_list_add(struct i2o_controller * c, void *ptr) * Removes a previously added pointer from the context list and returns * the matching context id. * - * Returns context id on succes or 0 on failure. + * Returns context id on success or 0 on failure. */ u32 i2o_cntxt_list_remove(struct i2o_controller * c, void *ptr) { @@ -198,7 +198,7 @@ void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context) * @c: controller to which the context list belong * @ptr: pointer to which the context id should be fetched * - * Returns context id which matches to the pointer on succes or 0 on + * Returns context id which matches to the pointer on success or 0 on * failure. */ u32 i2o_cntxt_list_get_ptr(struct i2o_controller * c, void *ptr) diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 41c8fe2a928c..ce5eda985ab0 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -92,7 +92,7 @@ static void gru_vma_close(struct vm_area_struct *vma) /* * gru_file_mmap * - * Called when mmaping the device. Initializes the vma with a fault handler + * Called when mmapping the device. Initializes the vma with a fault handler * and private data structure necessary to allocate, track, and free the * underlying pages. */ diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 99b74a351020..941a4d35ef8d 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1360,7 +1360,7 @@ static struct mmc_host_ops s3cmci_ops = { static struct s3c24xx_mci_pdata s3cmci_def_pdata = { /* This is currently here to avoid a number of if (host->pdata) - * checks. Any zero fields to ensure reaonable defaults are picked. */ + * checks. Any zero fields to ensure reasonable defaults are picked. */ }; #ifdef CONFIG_CPU_FREQ diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index 3aa05cd18ea1..592016a0668f 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -18,7 +18,7 @@ to specify the offset instead of the absolute address NOTE: - With slram it's only possible to map a contigous memory region. Therfore + With slram it's only possible to map a contiguous memory region. Therefore if there's a device mapped somewhere in the region specified slram will fail to load (see kernel log if modprobe fails). diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index e51c1ed7ac18..b126cf887476 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -1056,7 +1056,7 @@ static struct nand_ecclayout doc200x_oobinfo = { }; /* Find the (I)NFTL Media Header, and optionally also the mirror media header. - On sucessful return, buf will contain a copy of the media header for + On successful return, buf will contain a copy of the media header for further processing. id is the string to scan for, and will presumably be either "ANAND" or "BNAND". If findmirror=1, also look for the mirror media header. The page #s of the found media headers are placed in mh0_page and diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c index db7ae9d6a296..92320a643275 100644 --- a/drivers/mtd/nand/nand_ecc.c +++ b/drivers/mtd/nand/nand_ecc.c @@ -475,7 +475,7 @@ int __nand_correct_data(unsigned char *buf, * * The b2 shift is there to get rid of the lowest two bits. * We could also do addressbits[b2] >> 1 but for the - * performace it does not make any difference + * performance it does not make any difference */ if (eccsize_mult == 1) byte_addr = (addressbits[b1] << 4) + addressbits[b0]; diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c index 11dc7e69c4fb..68b5b3a486a9 100644 --- a/drivers/mtd/nand/s3c2410.c +++ b/drivers/mtd/nand/s3c2410.c @@ -875,7 +875,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, * @info: The controller instance. * @nmtd: The driver version of the MTD instance. * - * This routine is called after the chip probe has succesfully completed + * This routine is called after the chip probe has successfully completed * and the relevant per-chip information updated. This call ensure that * we update the internal state accordingly. * diff --git a/drivers/net/82596.c b/drivers/net/82596.c index ea6b139b812c..1663bc9e45de 100644 --- a/drivers/net/82596.c +++ b/drivers/net/82596.c @@ -19,7 +19,7 @@ TBD: * look at deferring rx frames rather than discarding (as per tulip) * handle tx ring full as per tulip - * performace test to tune rx_copybreak + * performance test to tune rx_copybreak Most of my modifications relate to the braindead big-endian implementation by Intel. When the i596 is operating in diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c index 4e6359fff0e1..766aabfdfc75 100644 --- a/drivers/net/amd8111e.c +++ b/drivers/net/amd8111e.c @@ -1633,8 +1633,13 @@ static int amd8111e_enable_link_change(struct amd8111e_priv* lp) readl(lp->mmio + CMD7); return 0; } -/* This function is called when a packet transmission fails to complete within a resonable period, on the assumption that an interrupts have been failed or the interface is locked up. This function will reinitialize the hardware */ +/* + * This function is called when a packet transmission fails to complete + * within a reasonable period, on the assumption that an interrupt have + * failed or the interface is locked up. This function will reinitialize + * the hardware. + */ static void amd8111e_tx_timeout(struct net_device *dev) { struct amd8111e_priv* lp = netdev_priv(dev); diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index b5dc7f550725..9d828aed968a 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -120,7 +120,7 @@ static int irq = 5; /* Default IRQ */ * DAYNA driver mode: * Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95, * Farallon PhoneNET PC III, Farallon PhoneNET PC II - * Other cards possibly supported mode unkown though: + * Other cards possibly supported mode unknown though: * Dayna DL2000 (Full length), COPS LT/M (Micro-Channel) * * Cards NOT supported by this driver but supported by the ltpc.c diff --git a/drivers/net/ariadne.h b/drivers/net/ariadne.h index bb613f292e04..727be5cdd1ea 100644 --- a/drivers/net/ariadne.h +++ b/drivers/net/ariadne.h @@ -244,7 +244,7 @@ struct Am79C960 { #define DLNKTST 0x0010 /* Disable Link Status */ #define DAPC 0x0008 /* Disable Automatic Polarity Correction */ #define MENDECL 0x0004 /* MENDEC Loopback Mode */ -#define LRTTSEL 0x0002 /* Low Receive Treshold/Transmit Mode Select */ +#define LRTTSEL 0x0002 /* Low Receive Threshold/Transmit Mode Select */ #define PORTSEL1 0x0001 /* Port Select Bits */ #define PORTSEL2 0x8000 /* Port Select Bits */ #define INTL 0x4000 /* Internal Loopback */ diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index 1372e9a99f5b..96506eacc131 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -1543,7 +1543,7 @@ static irqreturn_t atl1c_intr(int irq, void *data) if (status & ISR_OVER) if (netif_msg_intr(adapter)) dev_warn(&pdev->dev, - "TX/RX over flow (status = 0x%x)\n", + "TX/RX overflow (status = 0x%x)\n", status & ISR_OVER); /* link event */ diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h index 49953787e41c..f0bb62b5ca9e 100644 --- a/drivers/net/benet/be_cmds.h +++ b/drivers/net/benet/be_cmds.h @@ -435,7 +435,7 @@ enum be_if_flags { * filtering capabilities. */ struct be_cmd_req_if_create { struct be_cmd_req_hdr hdr; - u32 version; /* ignore currntly */ + u32 version; /* ignore currently */ u32 capability_flags; u32 enable_flags; u8 mac_addr[ETH_ALEN]; diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 1f941f027718..02a0908707ed 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -1876,7 +1876,7 @@ int be_load_fw(struct be_adapter *adapter, u8 *func) goto fw_exit; } - dev_info(&adapter->pdev->dev, "Firmware flashed succesfully\n"); + dev_info(&adapter->pdev->dev, "Firmware flashed successfully\n"); fw_exit: release_firmware(fw); diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h index aa76cbada5e2..732eafdeb0f2 100644 --- a/drivers/net/bnx2x_reg.h +++ b/drivers/net/bnx2x_reg.h @@ -2536,7 +2536,7 @@ /* [RC 1] A flag to indicate that overflow error occurred in one of the queues. */ #define QM_REG_OVFERROR 0x16805c -/* [RC 7] the Q were the qverflow occurs */ +/* [RC 7] the Q where the overflow occurs */ #define QM_REG_OVFQNUM 0x168058 /* [R 16] Pause state for physical queues 15-0 */ #define QM_REG_PAUSESTATE0 0x168410 diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index f86612857a73..56ba872be9c1 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1285,7 +1285,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) /* * We do not use Tx completion interrupts to free DMAd Tx packets. - * This is good for performamce but means that we rely on new Tx + * This is good for performance but means that we rely on new Tx * packets arriving to run the destructors of completed packets, * which open up space in their sockets' send queues. Sometimes * we do not get such new packets causing Tx to stall. A single diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c index d76885223366..75b099ce49c9 100644 --- a/drivers/net/ehea/ehea_ethtool.c +++ b/drivers/net/ehea/ehea_ethtool.c @@ -118,7 +118,7 @@ doit: ret = ehea_set_portspeed(port, sp); if (!ret) - ehea_info("%s: Port speed succesfully set: %dMbps " + ehea_info("%s: Port speed successfully set: %dMbps " "%s Duplex", port->netdev->name, port->port_speed, port->full_duplex == 1 ? "Full" : "Half"); @@ -134,7 +134,7 @@ static int ehea_nway_reset(struct net_device *dev) ret = ehea_set_portspeed(port, EHEA_SPEED_AUTONEG); if (!ret) - ehea_info("%s: Port speed succesfully set: %dMbps " + ehea_info("%s: Port speed successfully set: %dMbps " "%s Duplex", port->netdev->name, port->port_speed, port->full_duplex == 1 ? "Full" : "Half"); diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index ed60fd664273..0cab992b3d1a 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -35,7 +35,7 @@ * driver only supports standard serial hardware (8250, 16450, 16550A) * * This modem usually draws its supply current out of the otherwise unused - * TXD pin of the serial port. Thus a contignuous stream of 0x00-bytes + * TXD pin of the serial port. Thus a contiguous stream of 0x00-bytes * is transmitted to achieve a positive supply voltage. * * hsk: This is a 4800 baud FSK modem, designed for TNC use. It works fine diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index aa7286bc4364..8739ba850f82 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1384,7 +1384,7 @@ static inline void veth_build_dma_list(struct dma_chunk *list, unsigned long done; int i = 1; - /* FIXME: skbs are continguous in real addresses. Do we + /* FIXME: skbs are contiguous in real addresses. Do we * really need to break it into PAGE_SIZE chunks, or can we do * it just at the granularity of iSeries real->absolute * mapping? Indeed, given the way the allocator works, can we diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c index a0c578585a50..b77238dbafb8 100644 --- a/drivers/net/lasi_82596.c +++ b/drivers/net/lasi_82596.c @@ -47,7 +47,7 @@ TBD: * look at deferring rx frames rather than discarding (as per tulip) * handle tx ring full as per tulip - * performace test to tune rx_copybreak + * performance test to tune rx_copybreak Most of my modifications relate to the braindead big-endian implementation by Intel. When the i596 is operating in diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c index 51e11c3e53e1..c0dbfc185b53 100644 --- a/drivers/net/lib82596.c +++ b/drivers/net/lib82596.c @@ -47,7 +47,7 @@ TBD: * look at deferring rx frames rather than discarding (as per tulip) * handle tx ring full as per tulip - * performace test to tune rx_copybreak + * performance test to tune rx_copybreak Most of my modifications relate to the braindead big-endian implementation by Intel. When the i596 is operating in diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index 03b781a7a182..829b9ec9ff67 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -204,7 +204,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); dma = be64_to_cpu(rx_desc->data[nr].addr); - en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); + en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, PCI_DMA_FROMDEVICE); put_page(skb_frags[nr].page); diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 8c7279965b44..3d1396af9462 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -47,7 +47,7 @@ enum { static int inline_thold __read_mostly = MAX_INLINE; module_param_named(inline_thold, inline_thold, int, 0444); -MODULE_PARM_DESC(inline_thold, "treshold for using inline data"); +MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 size, diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 4376147b0ea0..82c3ebc584e3 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -162,7 +162,7 @@ enum { #define MLX4_EN_DEF_RX_PAUSE 1 #define MLX4_EN_DEF_TX_PAUSE 1 -/* Interval between sucessive polls in the Tx routine when polling is used +/* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define MLX4_EN_TX_POLL_MODER 16 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c index b211613e9dbd..86fde1a90a5a 100644 --- a/drivers/net/ps3_gelic_net.c +++ b/drivers/net/ps3_gelic_net.c @@ -296,7 +296,7 @@ static void gelic_card_reset_chain(struct gelic_card *card, * @card: card structure * @descr: descriptor to re-init * - * return 0 on succes, <0 on failure + * return 0 on success, <0 on failure * * allocates a new rx skb, iommu-maps it and attaches it to the descriptor. * Activate the descriptor state-wise diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index c072f7f36acf..9d94a141555c 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -1760,7 +1760,7 @@ static int sis900_rx(struct net_device *net_dev) sis_priv->rx_ring[entry].bufptr, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); - /* refill the Rx buffer, what if there is not enought + /* refill the Rx buffer, what if there is not enough * memory for new socket buffer ?? */ if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) { /* @@ -1775,7 +1775,7 @@ static int sis900_rx(struct net_device *net_dev) } /* This situation should never happen, but due to - some unknow bugs, it is possible that + some unknown bugs, it is possible that we are working on NULL sk_buff :-( */ if (sis_priv->rx_skbuff[entry] == NULL) { if (netif_msg_rx_err(sis_priv)) diff --git a/drivers/net/skfp/h/smc.h b/drivers/net/skfp/h/smc.h index 1758d9548361..026a83b9f743 100644 --- a/drivers/net/skfp/h/smc.h +++ b/drivers/net/skfp/h/smc.h @@ -393,10 +393,10 @@ struct smt_config { */ u_long mac_d_max ; /* MAC : D_Max timer value */ - u_long lct_short ; /* LCT : error threshhold */ - u_long lct_medium ; /* LCT : error threshhold */ - u_long lct_long ; /* LCT : error threshhold */ - u_long lct_extended ; /* LCT : error threshhold */ + u_long lct_short ; /* LCT : error threshold */ + u_long lct_medium ; /* LCT : error threshold */ + u_long lct_long ; /* LCT : error threshold */ + u_long lct_extended ; /* LCT : error threshold */ } ; #ifdef DEBUG diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index b27156eaf267..db216a728503 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -1002,7 +1002,7 @@ static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } break; default: - printk("ioctl for %s: unknow cmd: %04x\n", dev->name, ioc.cmd); + printk("ioctl for %s: unknown cmd: %04x\n", dev->name, ioc.cmd); status = -EOPNOTSUPP; } // switch diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index ccdd196f5297..4a00940d0a54 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -816,7 +816,7 @@ static int smsc911x_mii_probe(struct net_device *dev) SMSC_TRACE(HW, "Passed Loop Back Test"); #endif /* USE_PHY_WORK_AROUND */ - SMSC_TRACE(HW, "phy initialised succesfully"); + SMSC_TRACE(HW, "phy initialised successfully"); return 0; } diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h index b5716bd8a597..016360c65ce2 100644 --- a/drivers/net/smsc911x.h +++ b/drivers/net/smsc911x.h @@ -30,7 +30,7 @@ #define SMSC_NAPI_WEIGHT 16 /* implements a PHY loopback test at initialisation time, to ensure a packet - * can be succesfully looped back */ + * can be successfully looped back */ #define USE_PHY_WORK_AROUND #define DPRINTK(nlevel, klevel, fmt, args...) \ diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 90e663f4515c..40b51e6bc77b 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -409,7 +409,7 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card) * @card: card structure * @descr: descriptor to re-init * - * Return 0 on succes, <0 on failure. + * Return 0 on success, <0 on failure. * * Allocates a new rx skb, iommu-maps it and attaches it to the * descriptor. Mark the descriptor as activated, ready-to-use. diff --git a/drivers/net/stmmac/gmac.c b/drivers/net/stmmac/gmac.c index b624bb5bae0a..52586ee68953 100644 --- a/drivers/net/stmmac/gmac.c +++ b/drivers/net/stmmac/gmac.c @@ -112,7 +112,7 @@ static void gmac_dma_operation_mode(unsigned long ioaddr, int txmode, " (threshold = %d)\n", txmode); csr6 &= ~DMA_CONTROL_TSF; csr6 &= DMA_CONTROL_TC_TX_MASK; - /* Set the transmit threashold */ + /* Set the transmit threshold */ if (txmode <= 32) csr6 |= DMA_CONTROL_TTC_32; else if (txmode <= 64) diff --git a/drivers/net/stmmac/gmac.h b/drivers/net/stmmac/gmac.h index 684a363120a9..2e82d6c9a148 100644 --- a/drivers/net/stmmac/gmac.h +++ b/drivers/net/stmmac/gmac.h @@ -154,14 +154,14 @@ enum rx_tx_priority_ratio { #define DMA_CONTROL_DT 0x04000000 /* Disable Drop TCP/IP csum error */ #define DMA_CONTROL_RSF 0x02000000 /* Receive Store and Forward */ #define DMA_CONTROL_DFF 0x01000000 /* Disaable flushing */ -/* Theshold for Activating the FC */ +/* Threshold for Activating the FC */ enum rfa { act_full_minus_1 = 0x00800000, act_full_minus_2 = 0x00800200, act_full_minus_3 = 0x00800400, act_full_minus_4 = 0x00800600, }; -/* Theshold for Deactivating the FC */ +/* Threshold for Deactivating the FC */ enum rfd { deac_full_minus_1 = 0x00400000, deac_full_minus_2 = 0x00400800, diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c index ebda61bc4c2f..78e12b5e3ac7 100644 --- a/drivers/net/tokenring/smctr.c +++ b/drivers/net/tokenring/smctr.c @@ -426,7 +426,7 @@ static int smctr_alloc_shared_memory(struct net_device *dev) smctr_malloc(dev, 1L); /* Allocate Non-MAC receive data buffers. - * To guarantee a minimum of 256 contigous memory to + * To guarantee a minimum of 256 contiguous memory to * UM_Receive_Packet's lookahead pointer, before a page * change or ring end is encountered, place each rx buffer on * a 256 byte boundary. diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 4469f2451a6f..5e9adbaf6745 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -3798,7 +3798,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma prop = of_get_property(np, "tx-clock", NULL); if (!prop) { printk(KERN_ERR - "ucc_geth: mising tx-clock-name property\n"); + "ucc_geth: missing tx-clock-name property\n"); return -EINVAL; } if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h index 03a6ca016d5a..a007e2acf651 100644 --- a/drivers/net/ucc_geth.h +++ b/drivers/net/ucc_geth.h @@ -80,16 +80,16 @@ struct ucc_geth { frames) received that were between 128 (Including FCS length==4) and 255 octets */ u32 txok; /* Total number of octets residing in frames - that where involved in succesfull + that where involved in successfull transmission */ u16 txcf; /* Total number of PAUSE control frames transmitted by this MAC */ u8 res4[0x2]; u32 tmca; /* Total number of frames that were transmitted - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ u32 tbca; /* Total number of frames transmitted - succesfully that had destination address + successfully that had destination address field equal to the broadcast address */ u32 rxfok; /* Total number of frames received OK */ u32 rxbok; /* Total number of octets received OK */ @@ -98,9 +98,9 @@ struct ucc_geth { HW because it includes octets in frames that never even reach the UCC */ u32 rmca; /* Total number of frames that were received - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ - u32 rbca; /* Total number of frames received succesfully + u32 rbca; /* Total number of frames received successfully that had destination address equal to the broadcast address */ u32 scar; /* Statistics carry register */ @@ -759,15 +759,15 @@ struct ucc_geth_hardware_statistics { frames) received that were between 128 (Including FCS length==4) and 255 octets */ u32 txok; /* Total number of octets residing in frames - that where involved in succesfull + that where involved in successfull transmission */ u16 txcf; /* Total number of PAUSE control frames transmitted by this MAC */ u32 tmca; /* Total number of frames that were transmitted - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ u32 tbca; /* Total number of frames transmitted - succesfully that had destination address + successfully that had destination address field equal to the broadcast address */ u32 rxfok; /* Total number of frames received OK */ u32 rxbok; /* Total number of octets received OK */ @@ -776,9 +776,9 @@ struct ucc_geth_hardware_statistics { HW because it includes octets in frames that never even reach the UCC */ u32 rmca; /* Total number of frames that were received - succesfully with the group address bit set + successfully with the group address bit set that are not broadcast frames */ - u32 rbca; /* Total number of frames received succesfully + u32 rbca; /* Total number of frames received successfully that had destination address equal to the broadcast address */ } __attribute__ ((packed)); diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index c6c922247d05..0c3c738d7419 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -748,7 +748,7 @@ static int smsc95xx_phy_initialize(struct usbnet *dev) mii_nway_restart(&dev->mii); if (netif_msg_ifup(dev)) - devdbg(dev, "phy initialised succesfully"); + devdbg(dev, "phy initialised successfully"); return 0; } diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 7ea71b33d2e9..ee784e091f67 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -927,7 +927,7 @@ static int __devinit lmc_init_one(struct pci_dev *pdev, sc->lmc_media = &lmc_t1_media; break; default: - printk(KERN_WARNING "%s: LMC UNKOWN CARD!\n", dev->name); + printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name); break; } diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 07c32e68909f..99d27473ba3f 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -1114,7 +1114,7 @@ error: * device. See the file header for the format. Run all checks on the * buffer header, then run over each payload's descriptors, verify * their consistency and act on each payload's contents. If - * everything is succesful, update the device's statistics. + * everything is successful, update the device's statistics. * * Note: You need to set the skb to contain only the length of the * received buffer; for that, use skb_trim(skb, RECEIVED_SIZE). diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index 1a039f2bd732..6f04cc758dcc 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -117,7 +117,7 @@ static unsigned int ath5k_hw_rfb_op(struct ath5k_hw *ah, /* * This code is used to optimize rf gain on different environments - * (temprature mostly) based on feedback from a power detector. + * (temperature mostly) based on feedback from a power detector. * * It's only used on RF5111 and RF5112, later RF chips seem to have * auto adjustment on hw -notice they have a much smaller BANK 7 and @@ -2675,7 +2675,7 @@ ath5k_setup_channel_powertable(struct ath5k_hw *ah, /* Fill curves in reverse order * from lower power (max gain) * to higher power. Use curve -> idx - * backmaping we did on eeprom init */ + * backmapping we did on eeprom init */ u8 idx = pdg_curve_to_idx[pdg]; /* Grab the needed curves by index */ @@ -2777,7 +2777,7 @@ ath5k_setup_channel_powertable(struct ath5k_hw *ah, /* Now we have a set of curves for this * channel on tmpL (x range is table_max - table_min * and y values are tmpL[pdg][]) sorted in the same - * order as EEPROM (because we've used the backmaping). + * order as EEPROM (because we've used the backmapping). * So for RF5112 it's from higher power to lower power * and for RF2413 it's from lower power to higher power. * For RF5111 we only have one curve. */ diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 1895d63aad0a..0a35ee62a02a 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -969,7 +969,7 @@ static bool ath_rc_update_per(struct ath_softc *sc, * Since this probe succeeded, we allow the next * probe twice as soon. This allows the maxRate * to move up faster if the probes are - * succesful. + * successful. */ ath_rc_priv->probe_time = now_msec - rate_table->probe_interval / 2; diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index a741d37fd96f..e1b330023200 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -551,7 +551,7 @@ static int ipw2100_get_ordinal(struct ipw2100_priv *priv, u32 ord, /* get number of entries */ field_count = *(((u16 *) & field_info) + 1); - /* abort if no enought memory */ + /* abort if no enough memory */ total_length = field_len * field_count; if (total_length > *len) { *len = total_length; @@ -3044,7 +3044,7 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) IPW_MAX_BDS)) { /* TODO: Support merging buffers if more than * IPW_MAX_BDS are used */ - IPW_DEBUG_INFO("%s: Maximum BD theshold exceeded. " + IPW_DEBUG_INFO("%s: Maximum BD threshold exceeded. " "Increase fragmentation level.\n", priv->net_dev->name); } @@ -6823,7 +6823,7 @@ static int ipw2100_wx_get_range(struct net_device *dev, range->max_qual.updated = 7; /* Updated all three */ range->avg_qual.qual = 70; /* > 8% missed beacons is 'bad' */ - /* TODO: Find real 'good' to 'bad' threshol value for RSSI */ + /* TODO: Find real 'good' to 'bad' threshold value for RSSI */ range->avg_qual.level = 20 + IPW2100_RSSI_TO_DBM; range->avg_qual.noise = 0; range->avg_qual.updated = 7; /* Updated all three */ diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 9b0f2c0646e0..b2aa960b8346 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -787,7 +787,7 @@ static int ipw_get_ordinal(struct ipw_priv *priv, u32 ord, void *val, u32 * len) /* get number of entries */ field_count = *(((u16 *) & field_info) + 1); - /* abort if not enought memory */ + /* abort if not enough memory */ total_len = field_len * field_count; if (total_len > *len) { *len = total_len; @@ -7751,7 +7751,7 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv, case SEC_LEVEL_0: break; default: - printk(KERN_ERR "Unknow security level %d\n", + printk(KERN_ERR "Unknown security level %d\n", priv->ieee->sec.level); break; } @@ -8917,7 +8917,7 @@ static int ipw_wx_get_range(struct net_device *dev, range->max_qual.updated = 7; /* Updated all three */ range->avg_qual.qual = 70; - /* TODO: Find real 'good' to 'bad' threshol value for RSSI */ + /* TODO: Find real 'good' to 'bad' threshold value for RSSI */ range->avg_qual.level = 0; /* FIXME to real average level */ range->avg_qual.noise = 0; range->avg_qual.updated = 7; /* Updated all three */ @@ -10290,7 +10290,7 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct libipw_txb *txb, case SEC_LEVEL_0: break; default: - printk(KERN_ERR "Unknow security level %d\n", + printk(KERN_ERR "Unknown security level %d\n", priv->ieee->sec.level); break; } diff --git a/drivers/net/wireless/ipw2x00/libipw_module.c b/drivers/net/wireless/ipw2x00/libipw_module.c index be5b809ec97a..20b8a8a20644 100644 --- a/drivers/net/wireless/ipw2x00/libipw_module.c +++ b/drivers/net/wireless/ipw2x00/libipw_module.c @@ -199,7 +199,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv, int monitor) ieee->host_decrypt = 1; ieee->host_mc_decrypt = 1; - /* Host fragementation in Open mode. Default is enabled. + /* Host fragmentation in Open mode. Default is enabled. * Note: host fragmentation is always enabled if host encryption * is enabled. For cards can do hardware encryption, they must do * hardware fragmentation as well. So we don't need a variable diff --git a/drivers/net/wireless/iwmc3200wifi/hal.c b/drivers/net/wireless/iwmc3200wifi/hal.c index c430418248b4..d13c8853ee82 100644 --- a/drivers/net/wireless/iwmc3200wifi/hal.c +++ b/drivers/net/wireless/iwmc3200wifi/hal.c @@ -411,7 +411,7 @@ static void iwm_build_lmac_hdr(struct iwm_priv *iwm, struct iwm_lmac_hdr *hdr, /* * iwm_hal_send_host_cmd(): sends commands to the UMAC or the LMAC. * Sending command to the LMAC is equivalent to sending a - * regular UMAC command with the LMAC passtrough or the LMAC + * regular UMAC command with the LMAC passthrough or the LMAC * wrapper UMAC command IDs. */ int iwm_hal_send_host_cmd(struct iwm_priv *iwm, diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c index 771a301003c9..8ddb51a2a977 100644 --- a/drivers/net/wireless/iwmc3200wifi/rx.c +++ b/drivers/net/wireless/iwmc3200wifi/rx.c @@ -1448,7 +1448,7 @@ static void iwm_rx_process_packet(struct iwm_priv *iwm, kfree_skb(packet->skb); break; default: - IWM_ERR(iwm, "Unknow ticket action: %d\n", + IWM_ERR(iwm, "Unknown ticket action: %d\n", le16_to_cpu(ticket_node->ticket->action)); kfree_skb(packet->skb); } diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index 485a8d406525..afe6abecc044 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -934,7 +934,7 @@ static int if_sdio_probe(struct sdio_func *func, } if (i == ARRAY_SIZE(if_sdio_models)) { - lbs_pr_err("unkown card model 0x%x\n", card->model); + lbs_pr_err("unknown card model 0x%x\n", card->model); ret = -ENODEV; goto free; } diff --git a/drivers/net/wireless/prism54/isl_ioctl.c b/drivers/net/wireless/prism54/isl_ioctl.c index bc08464d8323..f7f5c793514b 100644 --- a/drivers/net/wireless/prism54/isl_ioctl.c +++ b/drivers/net/wireless/prism54/isl_ioctl.c @@ -1897,7 +1897,7 @@ prism54_get_mac(struct net_device *ndev, struct iw_request_info *info, return 0; } -/* Setting policy also clears the MAC acl, even if we don't change the defaut +/* Setting policy also clears the MAC acl, even if we don't change the default * policy */ @@ -2323,7 +2323,7 @@ prism54_process_trap_helper(islpci_private *priv, enum oid_num_t oid, case DOT11_OID_BEACON: send_formatted_event(priv, - "Received a beacon from an unkown AP", + "Received a beacon from an unknown AP", mlme, 0); break; diff --git a/drivers/net/wireless/rt2x00/rt2400pci.h b/drivers/net/wireless/rt2x00/rt2400pci.h index ccd644104ad1..aced05775693 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.h +++ b/drivers/net/wireless/rt2x00/rt2400pci.h @@ -35,7 +35,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 100 diff --git a/drivers/net/wireless/rt2x00/rt2500pci.h b/drivers/net/wireless/rt2x00/rt2500pci.h index 54d37957883c..3db9041838a4 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.h +++ b/drivers/net/wireless/rt2x00/rt2500pci.h @@ -46,7 +46,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 121 diff --git a/drivers/net/wireless/rt2x00/rt2500usb.h b/drivers/net/wireless/rt2x00/rt2500usb.h index b01edca42583..d3000827883a 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.h +++ b/drivers/net/wireless/rt2x00/rt2500usb.h @@ -46,7 +46,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 120 diff --git a/drivers/net/wireless/rt2x00/rt61pci.h b/drivers/net/wireless/rt2x00/rt61pci.h index 93eb699165cc..77b5116f549b 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.h +++ b/drivers/net/wireless/rt2x00/rt61pci.h @@ -37,7 +37,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 120 diff --git a/drivers/net/wireless/rt2x00/rt73usb.h b/drivers/net/wireless/rt2x00/rt73usb.h index 81fe0be51c42..e194332dac5f 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.h +++ b/drivers/net/wireless/rt2x00/rt73usb.h @@ -37,7 +37,7 @@ /* * Signal information. - * Defaul offset is required for RSSI <-> dBm conversion. + * Default offset is required for RSSI <-> dBm conversion. */ #define DEFAULT_RSSI_OFFSET 120 diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 431a20ec6db6..b3b0b5b685c6 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -4011,7 +4011,7 @@ wavelan_interrupt(int irq, #endif /* Prevent reentrancy. We need to do that because we may have - * multiple interrupt handler running concurently. + * multiple interrupt handler running concurrently. * It is safe because interrupts are disabled before aquiring * the spinlock. */ spin_lock(&lp->spinlock); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 6d666359a42f..2b7f96594373 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -312,7 +312,7 @@ static void tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, * zd_mac_tx_failed - callback for failed frames * @dev: the mac80211 wireless device * - * This function is called if a frame couldn't be succesfully be + * This function is called if a frame couldn't be successfully be * transferred. The first frame from the tx queue, will be selected and * reported as error to the upper layers. */ diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index a6b4a5a53d40..f511e70d454c 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -650,7 +650,7 @@ ccio_clear_io_tlb(struct ioc *ioc, dma_addr_t iovp, size_t byte_cnt) * Mark the I/O Pdir entries invalid and blow away the corresponding I/O * TLB entries. * - * FIXME: at some threshhold it might be "cheaper" to just blow + * FIXME: at some threshold it might be "cheaper" to just blow * away the entire I/O TLB instead of individual entries. * * FIXME: Uturn has 256 TLB entries. We don't need to purge every diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d93108d148fc..36db20a8f892 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6533,7 +6533,7 @@ static struct ibm_struct volume_driver_data = { * The speeds are stored on handles * (FANA:FAN9), (FANC:FANB), (FANE:FAND). * - * There are three default speed sets, acessible as handles: + * There are three default speed sets, accessible as handles: * FS1L,FS1M,FS1H; FS2L,FS2M,FS2H; FS3L,FS3M,FS3H * * ACPI DSDT switches which set is in use depending on various diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c index 87b4f49a5251..a5135ebe5f07 100644 --- a/drivers/pnp/pnpbios/rsparser.c +++ b/drivers/pnp/pnpbios/rsparser.c @@ -191,7 +191,7 @@ static unsigned char *pnpbios_parse_allocated_resource_data(struct pnp_dev *dev, return (unsigned char *)p; break; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); @@ -405,7 +405,7 @@ pnpbios_parse_resource_option_data(unsigned char *p, unsigned char *end, case SMALL_TAG_END: return p + 2; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); @@ -475,7 +475,7 @@ static unsigned char *pnpbios_parse_compatible_ids(unsigned char *p, return (unsigned char *)p; break; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); @@ -744,7 +744,7 @@ static unsigned char *pnpbios_encode_allocated_resource_data(struct pnp_dev return (unsigned char *)p; break; - default: /* an unkown tag */ + default: /* an unknown tag */ len_err: dev_err(&dev->dev, "unknown tag %#x length %d\n", tag, len); diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c index 88cb74088611..3cbaf1811bd0 100644 --- a/drivers/ps3/ps3-sys-manager.c +++ b/drivers/ps3/ps3-sys-manager.c @@ -46,7 +46,7 @@ /** * struct ps3_sys_manager_header - System manager message header. * @version: Header version, currently 1. - * @size: Header size in bytes, curently 16. + * @size: Header size in bytes, currently 16. * @payload_size: Message payload size in bytes. * @service_id: Message type, one of enum ps3_sys_manager_service_id. * @request_tag: Unique number to identify reply. diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c index ad164056feb6..434e92fdb966 100644 --- a/drivers/rtc/rtc-v3020.c +++ b/drivers/rtc/rtc-v3020.c @@ -335,7 +335,7 @@ static int rtc_probe(struct platform_device *pdev) goto err_io; } - /* Make sure frequency measurment mode, test modes, and lock + /* Make sure frequency measurement mode, test modes, and lock * are all disabled */ v3020_set_reg(chip, V3020_STATUS_0, 0x0); diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 097d3846a828..f54b1eec6ddf 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -74,7 +74,7 @@ fs3270_do_io(struct raw3270_view *view, struct raw3270_request *rq) } rc = raw3270_start(view, rq); if (rc == 0) { - /* Started sucessfully. Now wait for completion. */ + /* Started successfully. Now wait for completion. */ wait_event(fp->wait, raw3270_request_final(rq)); } } while (rc == -EACCES); diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 8ab51608da55..c268a2e5b7c3 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -65,7 +65,7 @@ static void set_chp_logically_online(struct chp_id chpid, int onoff) chpid_to_chp(chpid)->state = onoff; } -/* On succes return 0 if channel-path is varied offline, 1 if it is varied +/* On success return 0 if channel-path is varied offline, 1 if it is varied * online. Return -ENODEV if channel-path is not registered. */ int chp_get_status(struct chp_id chpid) { diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 30f516111307..2985eb439485 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -462,7 +462,7 @@ static struct cmb_area cmb_area = { * block of memory, which can not be moved as long as any channel * is active. Therefore, a maximum number of subchannels needs to * be defined somewhere. This is a module parameter, defaulting to - * a resonable value of 1024, or 32 kb of memory. + * a reasonable value of 1024, or 32 kb of memory. * Current kernels don't allow kmalloc with more than 128kb, so the * maximum is 4096. */ diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 58e583b61e60..aa2b60a868ba 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -92,11 +92,11 @@ #define ENVCTRL_CPUTEMP_MON 1 /* cpu temperature monitor */ #define ENVCTRL_CPUVOLTAGE_MON 2 /* voltage monitor */ #define ENVCTRL_FANSTAT_MON 3 /* fan status monitor */ -#define ENVCTRL_ETHERTEMP_MON 4 /* ethernet temperarture */ +#define ENVCTRL_ETHERTEMP_MON 4 /* ethernet temperature */ /* monitor */ #define ENVCTRL_VOLTAGESTAT_MON 5 /* voltage status monitor */ #define ENVCTRL_MTHRBDTEMP_MON 6 /* motherboard temperature */ -#define ENVCTRL_SCSITEMP_MON 7 /* scsi temperarture */ +#define ENVCTRL_SCSITEMP_MON 7 /* scsi temperature */ #define ENVCTRL_GLOBALADDR_MON 8 /* global address */ /* Child device type. diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index f5a9addb7050..07ce9bfcdf06 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1491,7 +1491,7 @@ NCR_700_intr(int irq, void *dev_id) unsigned long flags; int handled = 0; - /* Use the host lock to serialise acess to the 53c700 + /* Use the host lock to serialise access to the 53c700 * hardware. Note: In future, we may need to take the queue * lock to enter the done routines. When that happens, we * need to ensure that for this driver, the host lock and the diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index cdbdec9f4fb2..83986ed86556 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -526,10 +526,10 @@ struct aac_driver_ident /* * The adapter interface specs all queues to be located in the same - * physically contigous block. The host structure that defines the + * physically contiguous block. The host structure that defines the * commuication queues will assume they are each a separate physically - * contigous memory region that will support them all being one big - * contigous block. + * contiguous memory region that will support them all being one big + * contiguous block. * There is a command and response queue for each level and direction of * commuication. These regions are accessed by both the host and adapter. */ diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index d598eba630d0..666d5151d628 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -226,7 +226,7 @@ static int aac_comm_init(struct aac_dev * dev) spin_lock_init(&dev->fib_lock); /* - * Allocate the physically contigous space for the commuication + * Allocate the physically contiguous space for the commuication * queue headers. */ diff --git a/drivers/scsi/aic7xxx/aic79xx.seq b/drivers/scsi/aic7xxx/aic79xx.seq index 3b66b5ae3d9f..2fb78e35a9e5 100644 --- a/drivers/scsi/aic7xxx/aic79xx.seq +++ b/drivers/scsi/aic7xxx/aic79xx.seq @@ -217,7 +217,7 @@ BEGIN_CRITICAL; scbdma_tohost_done: test CCSCBCTL, CCARREN jz fill_qoutfifo_dmadone; /* - * An SCB has been succesfully uploaded to the host. + * An SCB has been successfully uploaded to the host. * If the SCB was uploaded for some reason other than * bad SCSI status (currently only for underruns), we * queue the SCB for normal completion. Otherwise, we diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 63b521d615f2..4d419c155ce9 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -2487,7 +2487,7 @@ ahd_handle_scsiint(struct ahd_softc *ahd, u_int intstat) /* * Although the driver does not care about the * 'Selection in Progress' status bit, the busy - * LED does. SELINGO is only cleared by a sucessfull + * LED does. SELINGO is only cleared by a successfull * selection, so we must manually clear it to insure * the LED turns off just incase no future successful * selections occur (e.g. no devices on the bus). diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 8dfb59d58992..45aa728a76b2 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -1733,7 +1733,7 @@ ahc_handle_scsiint(struct ahc_softc *ahc, u_int intstat) /* * Although the driver does not care about the * 'Selection in Progress' status bit, the busy - * LED does. SELINGO is only cleared by a sucessfull + * LED does. SELINGO is only cleared by a successfull * selection, so we must manually clear it to insure * the LED turns off just incase no future successful * selections occur (e.g. no devices on the bus). diff --git a/drivers/scsi/bfa/include/defs/bfa_defs_pport.h b/drivers/scsi/bfa/include/defs/bfa_defs_pport.h index a000bc4e2d4a..bf320412ee24 100644 --- a/drivers/scsi/bfa/include/defs/bfa_defs_pport.h +++ b/drivers/scsi/bfa/include/defs/bfa_defs_pport.h @@ -61,7 +61,7 @@ enum bfa_pport_speed { * Port operational type (in sync with SNIA port type). */ enum bfa_pport_type { - BFA_PPORT_TYPE_UNKNOWN = 1, /* port type is unkown */ + BFA_PPORT_TYPE_UNKNOWN = 1, /* port type is unknown */ BFA_PPORT_TYPE_TRUNKED = 2, /* Trunked mode */ BFA_PPORT_TYPE_NPORT = 5, /* P2P with switched fabric */ BFA_PPORT_TYPE_NLPORT = 6, /* public loop */ diff --git a/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h b/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h index 31881d218515..ade763dbc8ce 100644 --- a/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h +++ b/drivers/scsi/bfa/include/defs/bfa_defs_tsensor.h @@ -25,7 +25,7 @@ * Temperature sensor status values */ enum bfa_tsensor_status { - BFA_TSENSOR_STATUS_UNKNOWN = 1, /* unkown status */ + BFA_TSENSOR_STATUS_UNKNOWN = 1, /* unknown status */ BFA_TSENSOR_STATUS_FAULTY = 2, /* sensor is faulty */ BFA_TSENSOR_STATUS_BELOW_MIN = 3, /* temperature below mininum */ BFA_TSENSOR_STATUS_NOMINAL = 4, /* normal temperature */ diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index a0e7e711ff9d..5be67a6fca93 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -834,7 +834,7 @@ static int hptiop_reset_hba(struct hptiop_hba *hba) atomic_read(&hba->resetting) == 0, 60 * HZ); if (atomic_read(&hba->resetting)) { - /* IOP is in unkown state, abort reset */ + /* IOP is in unknown state, abort reset */ printk(KERN_ERR "scsi%d: reset failed\n", hba->host->host_no); return -1; } diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index bd2f77197447..6486ae4591b8 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -56,7 +56,7 @@ * at the same time. * * When discovery succeeds or fails a callback is made to the lport as - * notification. Currently, succesful discovery causes the lport to take no + * notification. Currently, successful discovery causes the lport to take no * action. A failure will cause the lport to reset. There is likely a circular * locking problem with this implementation. */ diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 2e0746d70303..ca25ee5190b0 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -1004,7 +1004,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task) * iscsi_tcp_task_xmit - xmit normal PDU task * @task: iscsi command task * - * We're expected to return 0 when everything was transmitted succesfully, + * We're expected to return 0 when everything was transmitted successfully, * -EAGAIN if there's still data in the queue, or != 0 for any other kind * of error. */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index e1a30a16a9fa..9bd19aa14249 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -654,7 +654,7 @@ lpfc_selective_reset(struct lpfc_hba *phba) * Notes: * Assumes any error from lpfc_selective_reset() will be negative. * If lpfc_selective_reset() returns zero then the length of the buffer - * is returned which indicates succcess + * is returned which indicates success * * Returns: * -EINVAL if the buffer does not contain the string "selective" @@ -3147,7 +3147,7 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr, * sysfs_ctlreg_read - Read method for reading from ctlreg * @kobj: kernel kobject that contains the kernel class device. * @bin_attr: kernel attributes passed to us. - * @buf: if succesful contains the data from the adapter IOREG space. + * @buf: if successful contains the data from the adapter IOREG space. * @off: offset into buffer to beginning of data. * @count: bytes to transfer. * diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 45337cd23feb..a14ab4580d4e 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -802,7 +802,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* FLOGI completes successfully */ lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "0101 FLOGI completes sucessfully " + "0101 FLOGI completes successfully " "Data: x%x x%x x%x x%x\n", irsp->un.ulpWord[4], sp->cmn.e_d_tov, sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution); @@ -4133,7 +4133,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, /* Indicate we are walking fc_rscn_id_list on this vport */ vport->fc_rscn_flush = 1; spin_unlock_irq(shost->host_lock); - /* Get the array count after sucessfully have the token */ + /* Get the array count after successfully have the token */ rscn_cnt = vport->fc_rscn_id_cnt; /* If we are already processing an RSCN, save the received * RSCN payload buffer, cmdiocb->context2 to process later. diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 562d8cee874b..82f8ab5c72cd 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -645,7 +645,7 @@ lpfc_hba_down_prep(struct lpfc_hba *phba) * down the SLI Layer. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ static int @@ -700,7 +700,7 @@ lpfc_hba_down_post_s3(struct lpfc_hba *phba) * down the SLI Layer. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ static int @@ -755,7 +755,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) * uninitialization after the HBA is reset when bring down the SLI Layer. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ int @@ -1254,7 +1254,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) * routine from the API jump table function pointer from the lpfc_hba struct. * * Return codes - * 0 - sucess. + * 0 - success. * Any other value - error. **/ void @@ -3124,7 +3124,7 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode) * PCI devices. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3220,7 +3220,7 @@ lpfc_reset_hba(struct lpfc_hba *phba) * support the SLI-3 HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3321,7 +3321,7 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba) * support the SLI-4 HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3642,7 +3642,7 @@ lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp) * device specific resource setup to support the HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3688,7 +3688,7 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) * device specific resource setup to support the HBA device it attached to. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3753,7 +3753,7 @@ lpfc_free_iocb_list(struct lpfc_hba *phba) * list and set up the IOCB tag array accordingly. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3872,7 +3872,7 @@ lpfc_free_active_sgl(struct lpfc_hba *phba) * list and set up the sgl xritag tag array accordingly. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -3986,7 +3986,7 @@ out_free_mem: * enabled and the driver is reinitializing the device. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -4146,7 +4146,7 @@ lpfc_sli4_remove_rpi_hdrs(struct lpfc_hba *phba) * PCI device data structure is set. * * Return codes - * pointer to @phba - sucessful + * pointer to @phba - successful * NULL - error **/ static struct lpfc_hba * @@ -4202,7 +4202,7 @@ lpfc_hba_free(struct lpfc_hba *phba) * host with it. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -4365,7 +4365,7 @@ lpfc_post_init_setup(struct lpfc_hba *phba) * with SLI-3 interface spec. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -4662,7 +4662,7 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) * this routine. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - could not allocated memory. **/ static int @@ -4761,7 +4761,7 @@ lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba) * allocation for the port. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -4861,7 +4861,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) * HBA consistent with the SLI-4 interface spec. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -4910,7 +4910,7 @@ lpfc_setup_endian_order(struct lpfc_hba *phba) * we just use some constant number as place holder. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5218,7 +5218,7 @@ out_error: * operation. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5286,7 +5286,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) * operation. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5552,7 +5552,7 @@ out_error: * operation. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5599,7 +5599,7 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba) * Later, this can be used for all the slow-path events. * * Return codes - * 0 - sucessful + * 0 - successful * -ENOMEM - No availble memory **/ static int @@ -5760,7 +5760,7 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba) * all resources assigned to the PCI function which originates this request. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No availble memory * EIO - The mailbox failed to complete successfully. **/ @@ -5923,7 +5923,7 @@ lpfc_sli4_fcfi_unreg(struct lpfc_hba *phba, uint16_t fcfi) * with SLI-4 interface spec. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6052,7 +6052,7 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba) * will be left with MSI-X enabled and leaks its vectors. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6184,7 +6184,7 @@ lpfc_sli_disable_msix(struct lpfc_hba *phba) * is done in this function. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error */ static int @@ -6243,7 +6243,7 @@ lpfc_sli_disable_msi(struct lpfc_hba *phba) * MSI-X -> MSI -> IRQ. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static uint32_t @@ -6333,7 +6333,7 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba) * enabled and leaks its vectors. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6443,7 +6443,7 @@ lpfc_sli4_disable_msix(struct lpfc_hba *phba) * which is done in this function. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static int @@ -6508,7 +6508,7 @@ lpfc_sli4_disable_msi(struct lpfc_hba *phba) * MSI-X -> MSI -> IRQ. * * Return codes - * 0 - sucessful + * 0 - successful * other values - error **/ static uint32_t diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 43cbe336f1f8..42d4f3dae1d6 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1794,7 +1794,7 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) */ if (lpfc_sli_chk_mbx_command(pmbox->mbxCommand) == MBX_SHUTDOWN) { - /* Unknow mailbox command compl */ + /* Unknown mailbox command compl */ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "(%d):0323 Unknown Mailbox command " "x%x (x%x) Cmpl\n", @@ -4163,7 +4163,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba, * addition, this routine gets the port vpd data. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - could not allocated memory. **/ static int @@ -11091,7 +11091,7 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba) * sequential. * * Return codes - * 0 - sucessful + * 0 - successful * EIO - The mailbox failed to complete successfully. * When this error occurs, the driver is not guaranteed * to have any rpi regions posted to the device and @@ -11129,7 +11129,7 @@ lpfc_sli4_post_all_rpi_hdrs(struct lpfc_hba *phba) * maps up to 64 rpi context regions. * * Return codes - * 0 - sucessful + * 0 - successful * ENOMEM - No available memory * EIO - The mailbox failed to complete successfully. **/ @@ -11191,7 +11191,7 @@ lpfc_sli4_post_rpi_hdr(struct lpfc_hba *phba, struct lpfc_rpi_hdr *rpi_page) * PAGE_SIZE modulo 64 rpi context headers. * * Returns - * A nonzero rpi defined as rpi_base <= rpi < max_rpi if sucessful + * A nonzero rpi defined as rpi_base <= rpi < max_rpi if successful * LPFC_RPI_ALLOC_ERROR if no rpis are available. **/ int diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h index 512c2cc1a33f..d310f49d077e 100644 --- a/drivers/scsi/megaraid.h +++ b/drivers/scsi/megaraid.h @@ -381,7 +381,7 @@ typedef struct { u8 battery_status; /* * BIT 0: battery module missing * BIT 1: VBAD - * BIT 2: temprature high + * BIT 2: temperature high * BIT 3: battery pack missing * BIT 4,5: * 00 - charge complete diff --git a/drivers/scsi/megaraid/mbox_defs.h b/drivers/scsi/megaraid/mbox_defs.h index b25b74764ec3..ce2487a888ed 100644 --- a/drivers/scsi/megaraid/mbox_defs.h +++ b/drivers/scsi/megaraid/mbox_defs.h @@ -497,7 +497,7 @@ typedef struct { * @inserted_drive : channel:Id of inserted drive * @battery_status : bit 0: battery module missing * bit 1: VBAD - * bit 2: temprature high + * bit 2: temperature high * bit 3: battery pack missing * bit 4,5: * 00 - charge complete diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 234f0b7eb21c..f9ae8037a710 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -2704,7 +2704,7 @@ megaraid_reset_handler(struct scsi_cmnd *scp) } else { con_log(CL_ANN, (KERN_NOTICE - "megaraid mbox: reset sequence completed sucessfully\n")); + "megaraid mbox: reset sequence completed successfully\n")); } diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 86ab32d7ab15..756e509d495c 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -2894,7 +2894,7 @@ _scsih_normalize_sense(char *sense_buffer, struct sense_info *data) #ifdef CONFIG_SCSI_MPT2SAS_LOGGING /** - * _scsih_scsi_ioc_info - translated non-succesfull SCSI_IO request + * _scsih_scsi_ioc_info - translated non-successfull SCSI_IO request * @ioc: per adapter object * @scmd: pointer to scsi command object * @mpi_reply: reply mf payload returned from firmware diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index e3c482aa87b5..a2d569828308 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -6495,7 +6495,7 @@ static void ncr_int_ma (struct ncb *np) ** we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids ** bloat for such a should_not_happen situation). ** In all other situation, we reset the BUS. - ** Are these assumptions reasonnable ? (Wait and see ...) + ** Are these assumptions reasonable ? (Wait and see ...) */ unexpected_phase: dsp -= 8; diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index f7c70e2a8224..d3d39f86fcf7 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -3342,7 +3342,7 @@ static int pmcraid_chr_fasync(int fd, struct file *filep, int mode) * @direction : data transfer direction * * Return value - * 0 on sucess, non-zero error code on failure + * 0 on success, non-zero error code on failure */ static int pmcraid_build_passthrough_ioadls( struct pmcraid_cmd *cmd, @@ -3401,7 +3401,7 @@ static int pmcraid_build_passthrough_ioadls( * @direction: data transfer direction * * Return value - * 0 on sucess, non-zero error code on failure + * 0 on success, non-zero error code on failure */ static void pmcraid_release_passthrough_ioadls( struct pmcraid_cmd *cmd, @@ -3429,7 +3429,7 @@ static void pmcraid_release_passthrough_ioadls( * @arg: pointer to pmcraid_passthrough_buffer user buffer * * Return value - * 0 on sucess, non-zero error code on failure + * 0 on success, non-zero error code on failure */ static long pmcraid_ioctl_passthrough( struct pmcraid_instance *pinstance, diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 3441b3f90827..2752b56cad56 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -771,11 +771,11 @@ static struct pmcraid_ioasc_error pmcraid_ioasc_error_table[] = { {0x01180600, IOASC_LOG_LEVEL_MUST, "Recovered Error, soft media error, sector reassignment suggested"}, {0x015D0000, IOASC_LOG_LEVEL_MUST, - "Recovered Error, failure prediction thresold exceeded"}, + "Recovered Error, failure prediction threshold exceeded"}, {0x015D9200, IOASC_LOG_LEVEL_MUST, - "Recovered Error, soft Cache Card Battery error thresold"}, + "Recovered Error, soft Cache Card Battery error threshold"}, {0x015D9200, IOASC_LOG_LEVEL_MUST, - "Recovered Error, soft Cache Card Battery error thresold"}, + "Recovered Error, soft Cache Card Battery error threshold"}, {0x02048000, IOASC_LOG_LEVEL_MUST, "Not Ready, IOA Reset Required"}, {0x02408500, IOASC_LOG_LEVEL_MUST, diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 723fdecd91bd..0fd6ae6911ad 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(scsi_nl_send_transport_msg); * @data_buf: pointer to vendor unique data buffer * * Returns: - * 0 on succesful return + * 0 on successful return * otherwise, failing error code * * Notes: diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index fd47cb1bee1b..f27e52d963d3 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -666,7 +666,7 @@ EXPORT_SYMBOL(sas_phy_add); * * Note: * This function must only be called on a PHY that has not - * sucessfully been added using sas_phy_add(). + * successfully been added using sas_phy_add(). */ void sas_phy_free(struct sas_phy *phy) { @@ -896,7 +896,7 @@ EXPORT_SYMBOL(sas_port_add); * * Note: * This function must only be called on a PORT that has not - * sucessfully been added using sas_port_add(). + * successfully been added using sas_port_add(). */ void sas_port_free(struct sas_port *port) { @@ -1476,7 +1476,7 @@ EXPORT_SYMBOL(sas_rphy_add); * * Note: * This function must only be called on a remote - * PHY that has not sucessfully been added using + * PHY that has not successfully been added using * sas_rphy_add() (or has been sas_rphy_remove()'d) */ void sas_rphy_free(struct sas_rphy *rphy) diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 45374d66d26a..2b38f6ad6e11 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1864,7 +1864,7 @@ static pci_ers_result_t sym2_io_slot_dump(struct pci_dev *pdev) * * This routine is similar to sym_set_workarounds(), except * that, at this point, we already know that the device was - * succesfully intialized at least once before, and so most + * successfully intialized at least once before, and so most * of the steps taken there are un-needed here. */ static void sym2_reset_workarounds(struct pci_dev *pdev) diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index 297deb817a5d..a7bc8b7b09ac 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -2692,7 +2692,7 @@ static void sym_int_ma (struct sym_hcb *np) * we force a SIR_NEGO_PROTO interrupt (it is a hack that avoids * bloat for such a should_not_happen situation). * In all other situation, we reset the BUS. - * Are these assumptions reasonnable ? (Wait and see ...) + * Are these assumptions reasonable ? (Wait and see ...) */ unexpected_phase: dsp -= 8; diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.h b/drivers/scsi/sym53c8xx_2/sym_hipd.h index 053e63c86822..5a80cbac3f92 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.h +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.h @@ -54,7 +54,7 @@ * * SYM_OPT_LIMIT_COMMAND_REORDERING * When this option is set, the driver tries to limit tagged - * command reordering to some reasonnable value. + * command reordering to some reasonable value. * (set for Linux) */ #if 0 diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index d71dfe398940..36ede02ceacf 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -361,9 +361,9 @@ static const struct pnp_device_id pnp_dev_table[] = { { "LTS0001", 0 }, /* Rockwell's (PORALiNK) 33600 INT PNP */ { "WCI0003", 0 }, - /* Unkown PnP modems */ + /* Unknown PnP modems */ { "PNPCXXX", UNKNOWN_DEV }, - /* More unkown PnP modems */ + /* More unknown PnP modems */ { "PNPDXXX", UNKNOWN_DEV }, { "", 0 } }; diff --git a/drivers/serial/pmac_zilog.h b/drivers/serial/pmac_zilog.h index 570b0d925e83..f6e77f12acd5 100644 --- a/drivers/serial/pmac_zilog.h +++ b/drivers/serial/pmac_zilog.h @@ -73,7 +73,7 @@ static inline struct uart_pmac_port *pmz_get_port_A(struct uart_pmac_port *uap) } /* - * Register acessors. Note that we don't need to enforce a recovery + * Register accessors. Note that we don't need to enforce a recovery * delay on PCI PowerMac hardware, it's dealt in HW by the MacIO chip, * though if we try to use this driver on older machines, we might have * to add it back diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c index 0c08f286a2ef..46de564aaea0 100644 --- a/drivers/serial/ucc_uart.c +++ b/drivers/serial/ucc_uart.c @@ -313,7 +313,7 @@ static void qe_uart_stop_tx(struct uart_port *port) * This function will attempt to stuff of all the characters from the * kernel's transmit buffer into TX BDs. * - * A return value of non-zero indicates that it sucessfully stuffed all + * A return value of non-zero indicates that it successfully stuffed all * characters from the kernel buffer. * * A return value of zero indicates that there are still characters in the diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c index 40de151f2789..e89304c72568 100644 --- a/drivers/telephony/ixj.c +++ b/drivers/telephony/ixj.c @@ -4190,7 +4190,7 @@ static void ixj_aec_start(IXJ *j, int level) ixj_WriteDSPCommand(0x1224, j); ixj_WriteDSPCommand(0xE014, j); - ixj_WriteDSPCommand(0x0003, j); /* Lock threashold at 3dB */ + ixj_WriteDSPCommand(0x0003, j); /* Lock threshold at 3dB */ ixj_WriteDSPCommand(0xE338, j); /* Set Echo Suppresser Attenuation to 0dB */ @@ -4235,7 +4235,7 @@ static void ixj_aec_start(IXJ *j, int level) ixj_WriteDSPCommand(0x1224, j); ixj_WriteDSPCommand(0xE014, j); - ixj_WriteDSPCommand(0x0003, j); /* Lock threashold at 3dB */ + ixj_WriteDSPCommand(0x0003, j); /* Lock threshold at 3dB */ ixj_WriteDSPCommand(0xE338, j); /* Set Echo Suppresser Attenuation to 0dB */ diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index d171b563e94c..bba4d3eabe0f 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1958,7 +1958,7 @@ static void uea_dispatch_cmv_e1(struct uea_softc *sc, struct intr_pkt *intr) goto bad1; /* FIXME : ADI930 reply wrong preambule (func = 2, sub = 2) to - * the first MEMACESS cmv. Ignore it... + * the first MEMACCESS cmv. Ignore it... */ if (cmv->bFunction != dsc->function) { if (UEA_CHIP_VERSION(sc) == ADI930 diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 2473cf0c6b1d..b4bd2411c666 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1,5 +1,5 @@ /** - * drivers/usb/class/usbtmc.c - USB Test & Measurment class driver + * drivers/usb/class/usbtmc.c - USB Test & Measurement class driver * * Copyright (C) 2007 Stefan Kopp, Gechingen, Germany * Copyright (C) 2008 Novell, Inc. diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index da718e84d58d..e80f1af438c8 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1890,7 +1890,7 @@ static void cancel_async_set_config(struct usb_device *udev) * routine gets around the normal restrictions by using a work thread to * submit the change-config request. * - * Returns 0 if the request was succesfully queued, error code otherwise. + * Returns 0 if the request was successfully queued, error code otherwise. * The caller has no way to know whether the queued request will eventually * succeed. */ diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c index 7953948bfe4a..4e3657808b0f 100644 --- a/drivers/usb/gadget/f_acm.c +++ b/drivers/usb/gadget/f_acm.c @@ -432,7 +432,7 @@ static void acm_disable(struct usb_function *f) * @length: size of data * Context: irqs blocked, acm->lock held, acm_notify_req non-null * - * Returns zero on sucess or a negative errno. + * Returns zero on success or a negative errno. * * See section 6.3.5 of the CDC 1.1 specification for information * about the only notification we issue: SerialState change. diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c index 1937d8c7b433..adda1208a1ec 100644 --- a/drivers/usb/gadget/pxa27x_udc.c +++ b/drivers/usb/gadget/pxa27x_udc.c @@ -1524,7 +1524,7 @@ static int pxa_udc_get_frame(struct usb_gadget *_gadget) * pxa_udc_wakeup - Force udc device out of suspend * @_gadget: usb gadget * - * Returns 0 if succesfull, error code otherwise + * Returns 0 if successfull, error code otherwise */ static int pxa_udc_wakeup(struct usb_gadget *_gadget) { diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c index 62a226b61670..00a29855d0c4 100644 --- a/drivers/usb/host/fhci-sched.c +++ b/drivers/usb/host/fhci-sched.c @@ -627,7 +627,7 @@ irqreturn_t fhci_irq(struct usb_hcd *hcd) /* - * Process normal completions(error or sucess) and clean the schedule. + * Process normal completions(error or success) and clean the schedule. * * This is the main path for handing urbs back to drivers. The only other patth * is process_del_list(),which unlinks URBs by scanning EDs,instead of scanning diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 9ec7fd5da489..9579cf4c38bf 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -111,7 +111,7 @@ struct aes_ccm_b1 { * * CCM uses Ax blocks to generate a keystream with which the MIC and * the message's payload are encoded. A0 always encrypts/decrypts the - * MIC. Ax (x>0) are used for the sucesive payload blocks. + * MIC. Ax (x>0) are used for the successive payload blocks. * * The x is the counter, and is increased for each block. */ diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c index 613a5fc490d3..489b47833e2c 100644 --- a/drivers/usb/wusbcore/wa-xfer.c +++ b/drivers/usb/wusbcore/wa-xfer.c @@ -558,7 +558,7 @@ static void wa_seg_dto_cb(struct urb *urb) /* * Callback for the segment request * - * If succesful transition state (unless already transitioned or + * If successful transition state (unless already transitioned or * outbound transfer); otherwise, take a note of the error, mark this * segment done and try completion. * @@ -1364,7 +1364,7 @@ segment_aborted: /* * Callback for the IN data phase * - * If succesful transition state; otherwise, take a note of the + * If successful transition state; otherwise, take a note of the * error, mark this segment done and try completion. * * Note we don't access until we are sure that the transfer hasn't diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index c7080d497311..0bb665a0c024 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -229,7 +229,7 @@ void i1480_usb_neep_cb(struct urb *urb) * will verify it. * * Set i1480->evt_result with the result of getting the event or its - * size (if succesful). + * size (if successful). * * Delivers the data directly to i1480->evt_buf */ diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c index 86a853b84119..7350ed6909f8 100644 --- a/drivers/uwb/wlp/txrx.c +++ b/drivers/uwb/wlp/txrx.c @@ -282,7 +282,7 @@ EXPORT_SYMBOL_GPL(wlp_receive_frame); * and transmission will be done by the calling function. * @dst: On return this will contain the device address to which the * frame is destined. - * @returns: 0 on success no tx : WLP header sucessfully applied to skb buffer, + * @returns: 0 on success no tx : WLP header successfully applied to skb buffer, * calling function can proceed with tx * 1 on success with tx : WLP will take over transmission of this * frame diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 913b4a47ae52..1ddeb4c34763 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -3276,7 +3276,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base) txtformat = "24 bit interface"; break; default: - txtformat = "unkown format"; + txtformat = "unknown format"; } } else { switch (format & 7) { @@ -3299,7 +3299,7 @@ static void __devinit aty_init_lcd(struct atyfb_par *par, u32 bios_base) txtformat = "262144 colours (FDPI-2 mode)"; break; default: - txtformat = "unkown format"; + txtformat = "unknown format"; } } PRINTKI("%s%s %s monitor detected: %s\n", diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c index 505c0823a105..2cf7ba52f67c 100644 --- a/drivers/video/backlight/atmel-pwm-bl.c +++ b/drivers/video/backlight/atmel-pwm-bl.c @@ -158,7 +158,7 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev) goto err_free_pwm; } - /* Turn display off by defatult. */ + /* Turn display off by default. */ retval = gpio_direction_output(pwmbl->gpio_on, 0 ^ pdata->on_active_low); if (retval) diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 50ec17dfc517..fa32b94a4546 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -177,7 +177,7 @@ static int __devinit tosa_lcd_probe(struct spi_device *spi) if (!data) return -ENOMEM; - data->is_vga = true; /* defaut to VGA mode */ + data->is_vga = true; /* default to VGA mode */ /* * bits_per_word cannot be configured in platform data diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c index 857b3668b3ba..6468a297e341 100644 --- a/drivers/video/console/sticore.c +++ b/drivers/video/console/sticore.c @@ -436,7 +436,7 @@ sti_init_glob_cfg(struct sti_struct *sti, (offs < PCI_BASE_ADDRESS_0 || offs > PCI_BASE_ADDRESS_5)) { printk (KERN_WARNING - "STI pci region maping for region %d (%02x) can't be mapped\n", + "STI pci region mapping for region %d (%02x) can't be mapped\n", i,sti->rm_entry[i]); continue; } diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 1a83709f9611..492e6e64b653 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -701,7 +701,7 @@ static int gbefb_set_par(struct fb_info *info) blocks of 512x128, 256x128 or 128x128 pixels, respectively for 8bit, 16bit and 32 bit modes (64 kB). They cover the screen with partial tiles on the right and/or bottom of the screen if needed. - For exemple in 640x480 8 bit mode the mapping is: + For example in 640x480 8 bit mode the mapping is: <-------- 640 -----> <---- 512 ----><128|384 offscreen> diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c index 6120f0c526fe..876648e15e9d 100644 --- a/drivers/video/stifb.c +++ b/drivers/video/stifb.c @@ -756,9 +756,9 @@ hyperResetPlanes(struct stifb_info *fb, int enable) if (fb->info.var.bits_per_pixel == 32) controlPlaneReg = 0x04000F00; else - controlPlaneReg = 0x00000F00; /* 0x00000800 should be enought, but lets clear all 4 bits */ + controlPlaneReg = 0x00000F00; /* 0x00000800 should be enough, but lets clear all 4 bits */ else - controlPlaneReg = 0x00000F00; /* 0x00000100 should be enought, but lets clear all 4 bits */ + controlPlaneReg = 0x00000F00; /* 0x00000100 should be enough, but lets clear all 4 bits */ switch (enable) { case ENABLE: diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c index ff43c8885028..980548390048 100644 --- a/drivers/video/tdfxfb.c +++ b/drivers/video/tdfxfb.c @@ -52,7 +52,7 @@ * * 0.1.3 (released 1999-11-02) added Attila's panning support, code * reorg, hwcursor address page size alignment - * (for mmaping both frame buffer and regs), + * (for mmapping both frame buffer and regs), * and my changes to get rid of hardcoded * VGA i/o register locations (uses PCI * configuration info now) diff --git a/drivers/video/via/dvi.c b/drivers/video/via/dvi.c index c5c32b6b6e6c..67b36932212b 100644 --- a/drivers/video/via/dvi.c +++ b/drivers/video/via/dvi.c @@ -467,7 +467,7 @@ static int dvi_get_panel_size_from_DDCv1(void) default: viaparinfo->tmds_setting_info->dvi_panel_size = VIA_RES_1024X768; - DEBUG_MSG(KERN_INFO "Unknow panel size max resolution = %d !\ + DEBUG_MSG(KERN_INFO "Unknown panel size max resolution = %d !\ set default panel size.\n", max_h); break; } @@ -534,7 +534,7 @@ static int dvi_get_panel_size_from_DDCv2(void) default: viaparinfo->tmds_setting_info->dvi_panel_size = VIA_RES_1024X768; - DEBUG_MSG(KERN_INFO "Unknow panel size max resolution = %d!\ + DEBUG_MSG(KERN_INFO "Unknown panel size max resolution = %d!\ set default panel size.\n", HSize); break; } diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c index 3df17dc8c3d7..65ccd215d496 100644 --- a/drivers/video/vt8623fb.c +++ b/drivers/video/vt8623fb.c @@ -446,7 +446,7 @@ static int vt8623fb_set_par(struct fb_info *info) svga_wseq_mask(0x1E, 0xF0, 0xF0); // DI/DVP bus svga_wseq_mask(0x2A, 0x0F, 0x0F); // DI/DVP bus - svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read treshold + svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read threshold vga_wseq(NULL, 0x17, 0x1F); // FIFO depth vga_wseq(NULL, 0x18, 0x4E); svga_wseq_mask(0x1A, 0x08, 0x08); // enable MMIO ? diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c index 381026c0bd7b..923cc68dba26 100644 --- a/drivers/watchdog/coh901327_wdt.c +++ b/drivers/watchdog/coh901327_wdt.c @@ -508,7 +508,7 @@ void coh901327_watchdog_reset(void) * deactivating the watchdog before it is shut down by it. * * NOTE: on future versions of the watchdog, this restriction is - * gone: the watchdog will be reloaded with a defaul value (1 min) + * gone: the watchdog will be reloaded with a default value (1 min) * instead of last value, and you can conveniently set the watchdog * timeout to 10ms (value = 1) without any problems. */ diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c index b6b3f59ab446..47d719717a3b 100644 --- a/drivers/watchdog/machzwd.c +++ b/drivers/watchdog/machzwd.c @@ -21,7 +21,7 @@ * wd#1 - 2 seconds; * wd#2 - 7.2 ms; * After the expiration of wd#1, it can generate a NMI, SCI, SMI, or - * a system RESET and it starts wd#2 that unconditionaly will RESET + * a system RESET and it starts wd#2 that unconditionally will RESET * the system when the counter reaches zero. * * 14-Dec-2001 Matt Domsch diff --git a/drivers/watchdog/wdrtas.c b/drivers/watchdog/wdrtas.c index 3bde56bce63a..5bfb1f2c5319 100644 --- a/drivers/watchdog/wdrtas.c +++ b/drivers/watchdog/wdrtas.c @@ -542,7 +542,7 @@ static struct notifier_block wdrtas_notifier = { /** * wdrtas_get_tokens - reads in RTAS tokens * - * returns 0 on succes, <0 on failure + * returns 0 on success, <0 on failure * * wdrtas_get_tokens reads in the tokens for the RTAS calls used in * this watchdog driver. It tolerates, if "get-sensor-state" and @@ -598,7 +598,7 @@ static void wdrtas_unregister_devs(void) /** * wdrtas_register_devs - registers the misc dev handlers * - * returns 0 on succes, <0 on failure + * returns 0 on success, <0 on failure * * wdrtas_register_devs registers the watchdog and temperature watchdog * misc devs @@ -630,7 +630,7 @@ static int wdrtas_register_devs(void) /** * wdrtas_init - init function of the watchdog driver * - * returns 0 on succes, <0 on failure + * returns 0 on success, <0 on failure * * registers the file handlers and the reboot notifier */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b9b3bb51b1e4..d15ea1790bfb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -767,7 +767,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; - /* Now we do a little grungy work by mmaping the ELF image into + /* Now we do a little grungy work by mmapping the ELF image into the correct location in memory. */ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { diff --git a/fs/bio.c b/fs/bio.c index 12da5db8682c..2bd671a08e3b 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -272,7 +272,7 @@ EXPORT_SYMBOL(bio_init); * for a &struct bio to become free. If a %NULL @bs is passed in, we will * fall back to just using @kmalloc to allocate the required memory. * - * Note that the caller must set ->bi_destructor on succesful return + * Note that the caller must set ->bi_destructor on successful return * of a bio, to do the appropriate freeing of the bio once the reference * count drops to zero. **/ diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2c726b7b9faa..6815d2a84b94 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -256,7 +256,7 @@ out: * Insert @em into @tree or perform a simple forward/backward merge with * existing mappings. The extent_map struct passed in will be inserted * into the tree directly, with an additional reference taken, or a - * reference dropped if the merge attempt was sucessfull. + * reference dropped if the merge attempt was successfull. */ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) diff --git a/fs/cifs/README b/fs/cifs/README index 79c1a93400be..a727b7cb075f 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -423,7 +423,7 @@ A partial list of the supported mount options follows: source name to use to represent the client netbios machine name when doing the RFC1001 netbios session initialize. direct Do not do inode data caching on files opened on this mount. - This precludes mmaping files on this mount. In some cases + This precludes mmapping files on this mount. In some cases with fast networks and little or no caching benefits on the client (e.g. when the application is doing large sequential reads bigger than page size without rereading the same data) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5d0fde18039c..4b35f7ec0583 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -39,7 +39,7 @@ /* * MAX_REQ is the maximum number of requests that WE will send - * on one socket concurently. It also matches the most common + * on one socket concurrently. It also matches the most common * value of max multiplex returned by servers. We may * eventually want to use the negotiated value (in case * future servers can handle more) when we are more confident that diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5e2492535daa..83580213fcac 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -917,8 +917,8 @@ undo_setattr: /* * If dentry->d_inode is null (usually meaning the cached dentry * is a negative dentry) then we would attempt a standard SMB delete, but - * if that fails we can not attempt the fall back mechanisms on EACESS - * but will return the EACESS to the caller. Note that the VFS does not call + * if that fails we can not attempt the fall back mechanisms on EACCESS + * but will return the EACCESS to the caller. Note that the VFS does not call * unlink on negative dentries currently. */ int cifs_unlink(struct inode *dir, struct dentry *dentry) diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index 224a1f478966..b6b6dcb500bf 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c @@ -371,7 +371,7 @@ E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24) smbhash(p24 + 16, c8, p21 + 14, 1); } -#if 0 /* currently unsued */ +#if 0 /* currently unused */ static void D_P16(unsigned char *p14, unsigned char *in, unsigned char *out) { diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 16f682e26c07..b540aa5d1f61 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -143,7 +143,7 @@ out: } EXPORT_SYMBOL_GPL(dlm_posix_lock); -/* Returns failure iff a succesful lock operation should be canceled */ +/* Returns failure iff a successful lock operation should be canceled */ static int dlm_plock_callback(struct plock_op *op) { struct file *file; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 618ca95cbb59..0282ec78cf8f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2932,7 +2932,7 @@ retry: ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); /* - * If we have a contigous extent of pages and we + * If we have a contiguous extent of pages and we * haven't done the I/O yet, map the blocks and submit * them for I/O. */ @@ -5370,7 +5370,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * worse case, the indexs blocks spread over different block groups * * If datablocks are discontiguous, they are possible to spread over - * different block groups too. If they are contiugous, with flexbg, + * different block groups too. If they are contiuguous, with flexbg, * they could still across block group boundary. * * Also account for superblock, inode, quota and xattr blocks @@ -5446,7 +5446,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) * Calculate the journal credits for a chunk of data modification. * * This is called from DIO, fallocate or whoever calling - * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. + * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. * * journal buffers for data blocks are not included here, as DIO * and fallocate do no need to journal data buffers. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..74e495dabe09 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -142,7 +142,7 @@ * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The * value of s_mb_order2_reqs can be tuned via * /sys/fs/ext4//mb_order2_req. If the request len is equal to - * stripe size (sbi->s_stripe), we try to search for contigous block in + * stripe size (sbi->s_stripe), we try to search for contiguous block in * stripe size. This should result in better allocation on RAID setups. If * not, we search in the specific group using bitmap for best extents. The * tunable min_to_scan and max_to_scan control the behaviour here. diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index f25e70c1b51c..f0294410868d 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c @@ -177,7 +177,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, spin_unlock(&jffs2_compressor_list_lock); break; default: - printk(KERN_ERR "JFFS2: unknow compression mode.\n"); + printk(KERN_ERR "JFFS2: unknown compression mode.\n"); } out: if (ret == JFFS2_COMPR_NONE) { diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 1a80301004b8..378991cfe40f 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -931,7 +931,7 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re * Helper function for jffs2_get_inode_nodes(). * The function detects whether more data should be read and reads it if yes. * - * Returns: 0 on succes; + * Returns: 0 on success; * negative error code on failure. */ static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 082e844ab2db..4b107881acd5 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -31,7 +31,7 @@ * is used to release xattr name/value pair and detach from c->xattrindex. * reclaim_xattr_datum(c) * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when - * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold + * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold * is hard coded as 32KiB. * do_verify_xattr_datum(c, xd) * is used to load the xdatum informations without name/value pair from the medium. diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 2bc7d8aa5740..d9b031cf69f5 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -755,7 +755,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) * allocation group. */ if ((blkno & (bmp->db_agsize - 1)) == 0) - /* check if the AG is currenly being written to. + /* check if the AG is currently being written to. * if so, call dbNextAG() to find a non-busy * AG with sufficient free space. */ @@ -3337,7 +3337,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) for (i = 0, n = 0; i < agno; n++) { bmp->db_agfree[n] = 0; /* init collection point */ - /* coalesce cotiguous k AGs; */ + /* coalesce contiguous k AGs; */ for (j = 0; j < k && i < agno; j++, i++) { /* merge AGi to AGn */ bmp->db_agfree[n] += bmp->db_agfree[i]; diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 0d58caf4a6e1..ec8f45f12e05 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -835,7 +835,7 @@ static int ncp_ioctl_need_write(unsigned int cmd) case NCP_IOC_SETROOT: return 0; default: - /* unkown IOCTL command, assume write */ + /* unknown IOCTL command, assume write */ return 1; } } diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 9669541d0119..08f7530e9341 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -927,7 +927,7 @@ lock_retry_remap: return 0; ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? - "EOVERFLOW" : (!err ? "EIO" : "unkown error")); + "EOVERFLOW" : (!err ? "EIO" : "unknown error")); return err < 0 ? err : -EIO; read_err: diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 663c0e341f8b..43179ddd336f 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -399,7 +399,7 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, * @cached_page: allocated but as yet unused page * @lru_pvec: lru-buffering pagevec of caller * - * Obtain @nr_pages locked page cache pages from the mapping @maping and + * Obtain @nr_pages locked page cache pages from the mapping @mapping and * starting at index @index. * * If a page is newly created, increment its refcount and add it to the @@ -1281,7 +1281,7 @@ rl_not_mapped_enoent: /* * Copy as much as we can into the pages and return the number of bytes which - * were sucessfully copied. If a fault is encountered then clear the pages + * were successfully copied. If a fault is encountered then clear the pages * out to (ofs + bytes) and return the number of bytes which were copied. */ static inline size_t ntfs_copy_from_user(struct page **pages, diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 89b02985c054..4dadcdf3d451 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -338,7 +338,7 @@ err_out: * copy of the complete multi sector transfer deprotected page. On failure, * *@wrp is undefined. * - * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current + * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current * logfile lsn according to this restart page. On failure, *@lsn is undefined. * * The following error codes are defined: diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 38a42f5d59ff..7c7198a5bc90 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -2398,7 +2398,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) * * The array is assumed to be large enough to hold an entire path (tree depth). * - * Upon succesful return from this function: + * Upon successful return from this function: * * - The 'right_path' array will contain a path to the leaf block * whose range contains e_cpos. diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 83bcaf266b35..03ccf9a7b1f4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2586,7 +2586,7 @@ fail: * is complete everywhere. if the target dies while this is * going on, some nodes could potentially see the target as the * master, so it is important that my recovery finds the migration - * mle and sets the master to UNKNONWN. */ + * mle and sets the master to UNKNOWN. */ /* wait for new node to assert master */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 0d38d67194cb..c5e4a49e3a12 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1855,7 +1855,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) * outstanding lock request, so a cancel convert is * required. We intentionally overwrite 'ret' - if the * cancel fails and the lock was granted, it's easier - * to just bubble sucess back up to the user. + * to just bubble success back up to the user. */ ret = ocfs2_flock_handle_signal(lockres, level); } else if (!ret && (level > lockres->l_level)) { diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 54c16b66327e..bf34c491ae96 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -659,7 +659,7 @@ static int __ocfs2_journal_access(handle_t *handle, default: status = -EINVAL; - mlog(ML_ERROR, "Uknown access type!\n"); + mlog(ML_ERROR, "Unknown access type!\n"); } if (!status && ocfs2_meta_ecc(osb) && triggers) jbd2_journal_set_triggers(bh, &triggers->ot_triggers); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 60287fc56bcb..d00c658ca150 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2431,7 +2431,7 @@ out: * we gonna touch and whether we need to create new blocks. * * Normally the refcount blocks store these refcount should be - * continguous also, so that we can get the number easily. + * contiguous also, so that we can get the number easily. * As for meta_ac, we will at most add split 2 refcount record and * 2 more refcount block, so just check it in a rough way. * diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index e1c0ec0ae989..082234581d05 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -85,7 +85,7 @@ out: } /* - * Tries to allocate exactly one block. Returns true if sucessful. + * Tries to allocate exactly one block. Returns true if successful. */ int omfs_allocate_block(struct super_block *sb, u64 block) { diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index f94ddf7efba0..868a55ee080f 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -23,7 +23,7 @@ /* * This file implements functions needed to recover from unclean un-mounts. * When UBIFS is mounted, it checks a flag on the master node to determine if - * an un-mount was completed sucessfully. If not, the process of mounting + * an un-mount was completed successfully. If not, the process of mounting * incorparates additional checking and fixing of on-flash data structures. * UBIFS always cleans away all remnants of an unclean un-mount, so that * errors do not accumulate. However UBIFS defers recovery if it is mounted diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 6533ead9b889..a2c16bcee90b 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -98,7 +98,7 @@ typedef struct xfs_dquot { #define dq_flags q_lists.dqm_flags /* - * Lock hierachy for q_qlock: + * Lock hierarchy for q_qlock: * XFS_QLOCK_NORMAL is the implicit default, * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 */ diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 4c8d0afae711..fb2d63f13f4c 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -47,7 +47,7 @@ #elif defined(CONFIG_SPARSEMEM_VMEMMAP) -/* memmap is virtually contigious. */ +/* memmap is virtually contiguous. */ #define __pfn_to_page(pfn) (vmemmap + (pfn)) #define __page_to_pfn(page) (unsigned long)((page) - vmemmap) diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index d76b66acea95..7c38c147e5e6 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -631,7 +631,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) * these are provided for both review and as a porting * help for the C library version. * - * Last chance: are any of these important enought to + * Last chance: are any of these important enough to * enable by default? */ #ifdef __ARCH_WANT_SYSCALL_NO_AT diff --git a/include/linux/chio.h b/include/linux/chio.h index 519248d8b2b6..d9bac7f97282 100644 --- a/include/linux/chio.h +++ b/include/linux/chio.h @@ -21,7 +21,7 @@ * query vendor-specific element types * * accessing elements works by specifing type and unit of the element. - * for eample, storage elements are addressed with type = CHET_ST and + * for example, storage elements are addressed with type = CHET_ST and * unit = 0 .. cp_nslots-1 * */ diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h index e5124ceea769..3402042ddc31 100644 --- a/include/linux/mfd/ezx-pcap.h +++ b/include/linux/mfd/ezx-pcap.h @@ -45,7 +45,7 @@ void pcap_set_ts_bits(struct pcap_chip *, u32); #define PCAP_CLEAR_INTERRUPT_REGISTER 0x01ffffff #define PCAP_MASK_ALL_INTERRUPT 0x01ffffff -/* registers acessible by both pcap ports */ +/* registers accessible by both pcap ports */ #define PCAP_REG_ISR 0x0 /* Interrupt Status */ #define PCAP_REG_MSR 0x1 /* Interrupt Mask */ #define PCAP_REG_PSTAT 0x2 /* Processor Status */ @@ -67,7 +67,7 @@ void pcap_set_ts_bits(struct pcap_chip *, u32); #define PCAP_REG_VENDOR_TEST1 0x1e #define PCAP_REG_VENDOR_TEST2 0x1f -/* registers acessible by pcap port 1 only (a1200, e2 & e6) */ +/* registers accessible by pcap port 1 only (a1200, e2 & e6) */ #define PCAP_REG_INT_SEL 0x3 /* Interrupt Select */ #define PCAP_REG_SWCTRL 0x4 /* Switching Regulator Control */ #define PCAP_REG_VREG1 0x5 /* Regulator Bank 1 Control */ diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index d745f5b6c7b0..76e5053e1fac 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -30,7 +30,7 @@ /* * use drive write caching -- we need deferred error handling to be - * able to sucessfully recover with this option (drive will return good + * able to successfully recover with this option (drive will return good * status as soon as the cdb is validated). */ #if defined(CONFIG_CDROM_PKTCDVD_WCACHE) diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 850db2e80510..cf9327c051ad 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -216,10 +216,10 @@ #define UART_IIR_TOD 0x08 /* Character Timeout Indication Detected */ -#define UART_FCR_PXAR1 0x00 /* receive FIFO treshold = 1 */ -#define UART_FCR_PXAR8 0x40 /* receive FIFO treshold = 8 */ -#define UART_FCR_PXAR16 0x80 /* receive FIFO treshold = 16 */ -#define UART_FCR_PXAR32 0xc0 /* receive FIFO treshold = 32 */ +#define UART_FCR_PXAR1 0x00 /* receive FIFO threshold = 1 */ +#define UART_FCR_PXAR8 0x40 /* receive FIFO threshold = 8 */ +#define UART_FCR_PXAR16 0x80 /* receive FIFO threshold = 16 */ +#define UART_FCR_PXAR32 0xc0 /* receive FIFO threshold = 32 */ diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index b59e78c57161..dfd4745a955f 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -490,7 +490,7 @@ struct v4l2_jpegcompression { * you do, leave them untouched. * Inluding less markers will make the * resulting code smaller, but there will - * be fewer aplications which can read it. + * be fewer applications which can read it. * The presence of the APP and COM marker * is influenced by APP_len and COM_len * ONLY, not by this property! */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6e5f0e0c7967..ca4789e4f1e1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -988,7 +988,7 @@ struct sctp_transport { int init_sent_count; /* state : The current state of this destination, - * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN. + * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKNOWN. */ int state; diff --git a/include/net/tcp.h b/include/net/tcp.h index 03a49c703377..1827e7f217d1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1260,7 +1260,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) /* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off + * TCP connection after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static inline bool retransmits_timed_out(const struct sock *sk, diff --git a/include/net/wimax.h b/include/net/wimax.h index 2af7bf839f23..3b07f6aad102 100644 --- a/include/net/wimax.h +++ b/include/net/wimax.h @@ -79,7 +79,7 @@ * drivers have to only report state changes due to external * conditions. * - * All API operations are 'atomic', serialized thorough a mutex in the + * All API operations are 'atomic', serialized through a mutex in the * `struct wimax_dev`. * * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK diff --git a/include/sound/wm8993.h b/include/sound/wm8993.h index 9c661f2f8cda..eee19f63c0d8 100644 --- a/include/sound/wm8993.h +++ b/include/sound/wm8993.h @@ -36,7 +36,7 @@ struct wm8993_platform_data { unsigned int micbias1_lvl:1; unsigned int micbias2_lvl:1; - /* Jack detect threashold levels, see datasheet for values */ + /* Jack detect threshold levels, see datasheet for values */ unsigned int jd_scthr:2; unsigned int jd_thr:2; }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7f29643c8985..759a629cc4bc 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -419,7 +419,7 @@ static void perf_event_remove_from_context(struct perf_event *event) if (!task) { /* * Per cpu events are removed via an smp call and - * the removal is always sucessful. + * the removal is always successful. */ smp_call_function_single(event->cpu, __perf_event_remove_from_context, @@ -827,7 +827,7 @@ perf_install_in_context(struct perf_event_context *ctx, if (!task) { /* * Per cpu events are installed via an smp call and - * the install is always sucessful. + * the install is always successful. */ smp_call_function_single(cpu, __perf_install_in_context, event, 1); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 234ceb10861f..456b2bc6b1ff 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -105,7 +105,7 @@ config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" depends on UNDEFINED # This option is on purpose disabled for now. - # It will be enabled when we are down to a resonable number + # It will be enabled when we are down to a reasonable number # of section mismatch warnings (< 10 for an allyesconfig build) help The section mismatch analysis checks if there are illegal diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 600f473a5610..76074209f9a2 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -299,7 +299,7 @@ static int INIT get_next_block(struct bunzip_data *bd) again when using them (during symbol decoding).*/ base = hufGroup->base-1; limit = hufGroup->limit-1; - /* Calculate permute[]. Concurently, initialize + /* Calculate permute[]. Concurrently, initialize * temp[] and limit[]. */ pp = 0; for (i = minLen; i <= maxLen; i++) { diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ce6b7eabf674..d9b08e0f7f55 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -259,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, * times. Without a hardware IOMMU this results in the * same device addresses being put into the dma-debug * hash multiple times too. This can result in false - * positives being reported. Therfore we implement a + * positives being reported. Therefore we implement a * best-fit algorithm here which returns the entry from * the hash which fits best to the reference value * instead of the first-fit. diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd28e807..853907e45868 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -453,7 +453,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) /* * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contigous entries available. + * entries to indicate the number of contiguous entries available. * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ diff --git a/mm/filemap.c b/mm/filemap.c index ef169f37156d..c3d3506ecaba 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1844,7 +1844,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, /* * Copy as much as we can into the page and return the number of bytes which - * were sucessfully copied. If a fault is encountered then return the number of + * were successfully copied. If a fault is encountered then return the number of * bytes which were copied. */ size_t iov_iter_copy_from_user_atomic(struct page *page, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7226e60e52af..c31a310aa146 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -209,7 +209,7 @@ struct mem_cgroup { int prev_priority; /* for recording reclaim priority */ /* - * While reclaiming in a hiearchy, we cache the last child we + * While reclaiming in a hierarchy, we cache the last child we * reclaimed from. */ int last_scanned_child; @@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, cgroup_lock(); /* - * If parent's use_hiearchy is set, we can't make any modifications + * If parent's use_hierarchy is set, we can't make any modifications * in the child subtrees. If it is unset, then the change can * occur, provided the current cgroup has no children. * diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dacc64183874..1ac49fef95ab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, list_for_each_entry_safe (tk, next, to_kill, nd) { if (doit) { /* - * In case something went wrong with munmaping + * In case something went wrong with munmapping * make sure the process doesn't catch the * signal and then access the memory. Just kill it. * the signal handlers diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f7e2fa0974dc..16ad251c9725 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) struct tcphdr _tcph, *tcph; __be16 oldval; - /* Not enought header? */ + /* Not enough header? */ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 356e65b1dc42..783c5f367d29 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -450,10 +450,10 @@ void irlap_disconnect_request(struct irlap_cb *self) /* Check if we are in the right state for disconnecting */ switch (self->state) { - case LAP_XMIT_P: /* FALLTROUGH */ - case LAP_XMIT_S: /* FALLTROUGH */ - case LAP_CONN: /* FALLTROUGH */ - case LAP_RESET_WAIT: /* FALLTROUGH */ + case LAP_XMIT_P: /* FALLTHROUGH */ + case LAP_XMIT_S: /* FALLTHROUGH */ + case LAP_CONN: /* FALLTHROUGH */ + case LAP_RESET_WAIT: /* FALLTHROUGH */ case LAP_RESET_CHECK: irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL); break; @@ -485,9 +485,9 @@ void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason) IRDA_DEBUG(1, "%s(), Sending reset request!\n", __func__); irlap_do_event(self, RESET_REQUEST, NULL, NULL); break; - case LAP_NO_RESPONSE: /* FALLTROUGH */ - case LAP_DISC_INDICATION: /* FALLTROUGH */ - case LAP_FOUND_NONE: /* FALLTROUGH */ + case LAP_NO_RESPONSE: /* FALLTHROUGH */ + case LAP_DISC_INDICATION: /* FALLTHROUGH */ + case LAP_FOUND_NONE: /* FALLTHROUGH */ case LAP_MEDIA_BUSY: irlmp_link_disconnect_indication(self->notify.instance, self, reason, NULL); diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index c5c51959e3ce..94a9884d7146 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -1741,7 +1741,7 @@ static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event, * Function irlap_state_xmit_s (event, skb, info) * * XMIT_S, The secondary station has been given the right to transmit, - * and we therefor do not expect to receive any transmissions from other + * and we therefore do not expect to receive any transmissions from other * stations. */ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event, diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7bf5b913828b..0e7d8bde145d 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -105,7 +105,7 @@ int __init irlmp_init(void) init_timer(&irlmp->discovery_timer); - /* Do discovery every 3 seconds, conditionaly */ + /* Do discovery every 3 seconds, conditionally */ if (sysctl_discovery) irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ); @@ -1842,7 +1842,7 @@ LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason) reason = LM_CONNECT_FAILURE; break; default: - IRDA_DEBUG(1, "%s(), Unknow IrLAP disconnect reason %d!\n", + IRDA_DEBUG(1, "%s(), Unknown IrLAP disconnect reason %d!\n", __func__, lap_reason); reason = LM_LAP_DISCONNECT; break; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 751c4d0e2b36..719ddbc9e48c 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -244,7 +244,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data * @addr: destination address of the path (ETH_ALEN length) * @sdata: local subif * - * Returns: 0 on sucess + * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ @@ -530,7 +530,7 @@ static void mesh_path_node_reclaim(struct rcu_head *rp) * @addr: dst address (ETH_ALEN length) * @sdata: local subif * - * Returns: 0 if succesful + * Returns: 0 if successful */ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) { diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 7a10bbe02c13..c5d9f97ef217 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -682,7 +682,7 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, * buckets and @skip_chain entries. For each entry in the table call * @callback, if @callback returns a negative value stop 'walking' through the * table and return. Updates the values in @skip_bkt and @skip_chain on - * return. Returns zero on succcess, negative values on failure. + * return. Returns zero on success, negative values on failure. * */ int netlbl_domhsh_walk(u32 *skip_bkt, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d4919556..29d8501bf156 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -719,7 +719,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has - * sucessfully completed a connect() call. + * successfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) sk->sk_state = SCTP_SS_ESTABLISHED; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f11be72a1a80..b15e1ebb2bfa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -54,7 +54,7 @@ * Assumptions: * - head[0] is physically contiguous. * - tail[0] is physically contiguous. - * - pages[] is not physically or virtually contigous and consists of + * - pages[] is not physically or virtually contiguous and consists of * PAGE_SIZE elements. * * Output: diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c index ca269178c4d4..35f370091f4f 100644 --- a/net/wimax/op-reset.c +++ b/net/wimax/op-reset.c @@ -62,7 +62,7 @@ * Called when wanting to reset the device for any reason. Device is * taken back to power on status. * - * This call blocks; on succesful return, the device has completed the + * This call blocks; on successful return, the device has completed the * reset process and is ready to operate. */ int wimax_reset(struct wimax_dev *wimax_dev) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index d82953573588..8413cf38ed27 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -213,7 +213,7 @@ load_config_help[] = N_( "to modify that configuration.\n" "\n" "If you are uncertain, then you have probably never used alternate\n" - "configuration files. You should therefor leave this blank to abort.\n"), + "configuration files. You should therefore leave this blank to abort.\n"), save_config_text[] = N_( "Enter a filename to which this configuration should be saved " "as an alternate. Leave blank to abort."), diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index e68823741ad5..2534400317c5 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -204,7 +204,7 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, * * Description * Call the NetLabel mechanism to set the label of a packet using @sid. - * Returns zero on auccess, negative values on failure. + * Returns zero on success, negative values on failure. * */ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index ff17820d35ec..5914eeb0b339 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -741,7 +741,7 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) goto out; } - /* type/domain unchaned */ + /* type/domain unchanged */ if (old_context->type == new_context->type) { rc = 0; goto out; diff --git a/sound/Kconfig b/sound/Kconfig index 4b5365ad6b46..fcad760f5691 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -55,7 +55,7 @@ config SOUND_OSS_CORE_PRECLAIM Please read Documentation/feature-removal-schedule.txt for details. - If unusre, say Y. + If unsure, say Y. source "sound/oss/dmasound/Kconfig" diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index a076a6ce8071..a828baaab636 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -177,7 +177,7 @@ static struct pnp_card_device_id snd_cs423x_pnpids[] = { { .id = "CSC0437", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } }, /* Digital PC 5000 Onboard - CS4236B */ { .id = "CSC0735", .devs = { { "CSC0000" }, { "CSC0010" } } }, - /* some uknown CS4236B */ + /* some unknown CS4236B */ { .id = "CSC0b35", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } }, /* Intel PR440FX Onboard sound */ { .id = "CSC0b36", .devs = { { "CSC0000" }, { "CSC0010" }, { "CSC0003" } } }, diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 02e30d7c6a93..ddad60ef3f37 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -137,7 +137,7 @@ struct snd_miro { static void snd_miro_proc_init(struct snd_miro * miro); static char * snd_opti9xx_names[] = { - "unkown", + "unknown", "82C928", "82C929", "82C924", "82C925", "82C930", "82C931", "82C933" diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 5cd555325b9d..848007508ffd 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -185,7 +185,7 @@ MODULE_DEVICE_TABLE(pnp_card, snd_opti9xx_pnpids); #endif static char * snd_opti9xx_names[] = { - "unkown", + "unknown", "82C928", "82C929", "82C924", "82C925", "82C930", "82C931", "82C933" diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c index 06e9e88e4c05..bb14e4c67e89 100644 --- a/sound/oss/dmasound/dmasound_paula.c +++ b/sound/oss/dmasound/dmasound_paula.c @@ -657,7 +657,7 @@ static int AmiStateInfo(char *buffer, size_t space) len += sprintf(buffer+len, "\tsound.volume_right = %d [0...64]\n", dmasound.volume_right); if (len >= space) { - printk(KERN_ERR "dmasound_paula: overlowed state buffer alloc.\n") ; + printk(KERN_ERR "dmasound_paula: overflowed state buffer alloc.\n") ; len = space ; } return len; diff --git a/sound/pci/ca0106/ca0106_proc.c b/sound/pci/ca0106/ca0106_proc.c index c62b7d10ec61..8d13092300da 100644 --- a/sound/pci/ca0106/ca0106_proc.c +++ b/sound/pci/ca0106/ca0106_proc.c @@ -233,7 +233,7 @@ static void snd_ca0106_proc_dump_iec958( struct snd_info_buffer *buffer, u32 val snd_iprintf(buffer, "user-defined\n"); break; default: - snd_iprintf(buffer, "unkown\n"); + snd_iprintf(buffer, "unknown\n"); break; } snd_iprintf(buffer, "Sample Bits: "); diff --git a/sound/pci/cs46xx/imgs/cwcdma.asp b/sound/pci/cs46xx/imgs/cwcdma.asp index 09d24c76f034..a65e1193c89a 100644 --- a/sound/pci/cs46xx/imgs/cwcdma.asp +++ b/sound/pci/cs46xx/imgs/cwcdma.asp @@ -26,10 +26,11 @@ // // // The purpose of this code is very simple: make it possible to tranfser -// the samples 'as they are' with no alteration from a PCMreader SCB (DMA from host) -// to any other SCB. This is useful for AC3 throug SPDIF. SRC (source rate converters) -// task always alters the samples in some how, however it's from 48khz -> 48khz. The -// alterations are not audible, but AC3 wont work. +// the samples 'as they are' with no alteration from a PCMreader +// SCB (DMA from host) to any other SCB. This is useful for AC3 through SPDIF. +// SRC (source rate converters) task always alters the samples in somehow, +// however it's from 48khz -> 48khz. +// The alterations are not audible, but AC3 wont work. // // ... // | diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index 36e08bd2b3cc..360e3809a60b 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -184,7 +184,7 @@ MODULE_PARM_DESC(enable, "Enable the EMU10K1X soundcard."); * The hardware has 3 channels for playback and 1 for capture. * - channel 0 is the front channel * - channel 1 is the rear channel - * - channel 2 is the center/lfe chanel + * - channel 2 is the center/lfe channel * Volume is controlled by the AC97 for the front and rear channels by * the PCM Playback Volume, Sigmatel Surround Playback Volume and * Surround Playback Volume. The Sigmatel 4-Speaker Stereo switch affects diff --git a/sound/pci/hda/patch_cmedia.c b/sound/pci/hda/patch_cmedia.c index 780e1a72114a..8917071d5b6a 100644 --- a/sound/pci/hda/patch_cmedia.c +++ b/sound/pci/hda/patch_cmedia.c @@ -66,7 +66,7 @@ struct cmi_spec { struct hda_pcm pcm_rec[2]; /* PCM information */ - /* pin deafault configuration */ + /* pin default configuration */ hda_nid_t pin_nid[NUM_PINS]; unsigned int def_conf[NUM_PINS]; unsigned int pin_def_confs; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ff20048504b6..872731eb49e8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6619,7 +6619,7 @@ static struct hda_input_mux alc889A_mb31_capture_source = { /* Front Mic (0x01) unused */ { "Line", 0x2 }, /* Line 2 (0x03) unused */ - /* CD (0x04) unsused? */ + /* CD (0x04) unused? */ }, }; diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 0dce331a2a3b..a1b10d1a384d 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -3017,7 +3017,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry, insel = "Coaxial"; break; default: - insel = "Unkown"; + insel = "Unknown"; } switch (hdspm->control_register & HDSPM_SyncRefMask) { @@ -3028,7 +3028,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry, syncref = "MADI"; break; default: - syncref = "Unkown"; + syncref = "Unknown"; } snd_iprintf(buffer, "Inputsel = %s, SyncRef = %s\n", insel, syncref); diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c index c33b92edbded..8ce1c9b2e5b8 100644 --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -101,7 +101,7 @@ static int uda134x_write(struct snd_soc_codec *codec, unsigned int reg, pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value); if (reg >= UDA134X_REGS_NUM) { - printk(KERN_ERR "%s unkown register: reg: %u", + printk(KERN_ERR "%s unknown register: reg: %u", __func__, reg); return -EINVAL; } @@ -552,7 +552,7 @@ static int uda134x_soc_probe(struct platform_device *pdev) ARRAY_SIZE(uda1341_snd_controls)); break; default: - printk(KERN_ERR "%s unkown codec type: %d", + printk(KERN_ERR "%s unknown codec type: %d", __func__, pd->model); return -EINVAL; } diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index fe1307b500cf..d72347d90b70 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -607,7 +607,7 @@ SOC_SINGLE("Right Input PGA Common Mode Switch", WM8903_ANALOGUE_RIGHT_INPUT_1, SOC_SINGLE("DRC Switch", WM8903_DRC_0, 15, 1, 0), SOC_ENUM("DRC Compressor Slope R0", drc_slope_r0), SOC_ENUM("DRC Compressor Slope R1", drc_slope_r1), -SOC_SINGLE_TLV("DRC Compressor Threashold Volume", WM8903_DRC_3, 5, 124, 1, +SOC_SINGLE_TLV("DRC Compressor Threshold Volume", WM8903_DRC_3, 5, 124, 1, drc_tlv_thresh), SOC_SINGLE_TLV("DRC Volume", WM8903_DRC_3, 0, 30, 1, drc_tlv_amp), SOC_SINGLE_TLV("DRC Minimum Gain Volume", WM8903_DRC_1, 2, 3, 1, drc_tlv_min), @@ -617,11 +617,11 @@ SOC_ENUM("DRC Decay Rate", drc_decay), SOC_ENUM("DRC FF Delay", drc_ff_delay), SOC_SINGLE("DRC Anticlip Switch", WM8903_DRC_0, 1, 1, 0), SOC_SINGLE("DRC QR Switch", WM8903_DRC_0, 2, 1, 0), -SOC_SINGLE_TLV("DRC QR Threashold Volume", WM8903_DRC_0, 6, 3, 0, drc_tlv_max), +SOC_SINGLE_TLV("DRC QR Threshold Volume", WM8903_DRC_0, 6, 3, 0, drc_tlv_max), SOC_ENUM("DRC QR Decay Rate", drc_qr_decay), SOC_SINGLE("DRC Smoothing Switch", WM8903_DRC_0, 3, 1, 0), SOC_SINGLE("DRC Smoothing Hysteresis Switch", WM8903_DRC_0, 0, 1, 0), -SOC_ENUM("DRC Smoothing Threashold", drc_smoothing), +SOC_ENUM("DRC Smoothing Threshold", drc_smoothing), SOC_SINGLE_TLV("DRC Startup Volume", WM8903_DRC_0, 6, 18, 0, drc_tlv_startup), SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT, diff --git a/sound/soc/codecs/wm8993.c b/sound/soc/codecs/wm8993.c index d9987999e92c..bc033687b220 100644 --- a/sound/soc/codecs/wm8993.c +++ b/sound/soc/codecs/wm8993.c @@ -689,7 +689,7 @@ SOC_DOUBLE_TLV("Digital Sidetone Volume", WM8993_DIGITAL_SIDE_TONE, SOC_SINGLE("DRC Switch", WM8993_DRC_CONTROL_1, 15, 1, 0), SOC_ENUM("DRC Path", drc_path), -SOC_SINGLE_TLV("DRC Compressor Threashold Volume", WM8993_DRC_CONTROL_2, +SOC_SINGLE_TLV("DRC Compressor Threshold Volume", WM8993_DRC_CONTROL_2, 2, 60, 1, drc_comp_threash), SOC_SINGLE_TLV("DRC Compressor Amplitude Volume", WM8993_DRC_CONTROL_3, 11, 30, 1, drc_comp_amp), @@ -709,7 +709,7 @@ SOC_SINGLE_TLV("DRC Quick Release Volume", WM8993_DRC_CONTROL_3, 2, 3, 0, SOC_ENUM("DRC Quick Release Rate", drc_qr_rate), SOC_SINGLE("DRC Smoothing Switch", WM8993_DRC_CONTROL_1, 11, 1, 0), SOC_SINGLE("DRC Smoothing Hysteresis Switch", WM8993_DRC_CONTROL_1, 8, 1, 0), -SOC_ENUM("DRC Smoothing Hysteresis Threashold", drc_smooth), +SOC_ENUM("DRC Smoothing Hysteresis Threshold", drc_smooth), SOC_SINGLE_TLV("DRC Startup Volume", WM8993_DRC_CONTROL_4, 8, 18, 0, drc_startup_tlv), diff --git a/sound/soc/s3c24xx/s3c24xx_simtec.c b/sound/soc/s3c24xx/s3c24xx_simtec.c index 1966e0d5652d..3c7ccb78b6ab 100644 --- a/sound/soc/s3c24xx/s3c24xx_simtec.c +++ b/sound/soc/s3c24xx/s3c24xx_simtec.c @@ -270,7 +270,7 @@ static int attach_gpio_amp(struct device *dev, gpio_direction_output(pd->amp_gain[1], 0); } - /* note, curently we assume GPA0 isn't valid amp */ + /* note, currently we assume GPA0 isn't valid amp */ if (pdata->amp_gpio > 0) { ret = gpio_request(pd->amp_gpio, "gpio-amp"); if (ret) { diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c index 83b8028e209d..81d6f983f51e 100644 --- a/sound/soc/s6000/s6000-pcm.c +++ b/sound/soc/s6000/s6000-pcm.c @@ -196,7 +196,7 @@ static int s6000_pcm_start(struct snd_pcm_substream *substream) 0 /* destination skip after chunk (impossible) */, 4 /* 16 byte burst size */, -1 /* don't conserve bandwidth */, - 0 /* low watermark irq descriptor theshold */, + 0 /* low watermark irq descriptor threshold */, 0 /* disable hardware timestamps */, 1 /* enable channel */); diff --git a/sound/sound_core.c b/sound/sound_core.c index 49c998186592..dbca7c909a31 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -353,7 +353,7 @@ static struct sound_unit *chains[SOUND_STEP]; * @dev: device pointer * * Allocate a special sound device by minor number from the sound - * subsystem. The allocated number is returned on succes. On failure + * subsystem. The allocated number is returned on success. On failure * a negative error code is returned. */ -- cgit v1.2.3 From 6070d81eb5f2d4943223c96e7609a53cdc984364 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Fri, 4 Dec 2009 15:47:01 -0500 Subject: tree-wide: fix misspelling of "definition" in comments "Definition" is misspelled "defintion" in several comments; this patch fixes them. No code changes. Signed-off-by: Adam Buchbinder Signed-off-by: Jiri Kosina --- arch/arm/mach-bcmring/include/csp/reg.h | 2 +- arch/arm/mach-bcmring/include/mach/csp/mm_addr.h | 2 +- arch/arm/mach-u300/include/mach/u300-regs.h | 2 +- arch/sh/include/mach-common/mach/titan.h | 2 +- arch/x86/include/asm/sigcontext.h | 4 ++-- drivers/block/cciss_cmd.h | 2 +- drivers/gpu/drm/radeon/atombios.h | 16 ++++++++-------- drivers/media/video/cx18/cx18-av-core.h | 2 +- drivers/media/video/cx18/cx18-mailbox.h | 2 +- drivers/net/wireless/ath/ath5k/base.h | 2 +- drivers/s390/net/qeth_core_mpc.h | 2 +- drivers/scsi/advansys.c | 2 +- drivers/scsi/aic94xx/aic94xx_reg_def.h | 2 +- drivers/scsi/bfa/include/protocol/ct.h | 4 ++-- drivers/scsi/dmx3191d.c | 2 +- drivers/scsi/wd7000.c | 2 +- drivers/staging/otus/80211core/pub_zfi.h | 2 +- drivers/staging/otus/zdcompat.h | 2 +- drivers/staging/rtl8192su/r8192S_phyreg.h | 2 +- fs/cifs/cifspdu.h | 2 +- fs/compat_ioctl.c | 2 +- include/linux/cciss_ioctl.h | 2 +- include/linux/in6.h | 2 +- include/linux/usb/wusb.h | 2 +- 24 files changed, 33 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/mach-bcmring/include/csp/reg.h b/arch/arm/mach-bcmring/include/csp/reg.h index e5f60bf5a1f3..56654d23c3d7 100644 --- a/arch/arm/mach-bcmring/include/csp/reg.h +++ b/arch/arm/mach-bcmring/include/csp/reg.h @@ -16,7 +16,7 @@ /** * @file reg.h * -* @brief Generic register defintions used in CSP +* @brief Generic register definitions used in CSP */ /****************************************************************************/ diff --git a/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h b/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h index 86bb58d4f58c..ad58cf873377 100644 --- a/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h +++ b/arch/arm/mach-bcmring/include/mach/csp/mm_addr.h @@ -16,7 +16,7 @@ /** * @file mm_addr.h * -* @brief Memory Map address defintions +* @brief Memory Map address definitions * * @note * None diff --git a/arch/arm/mach-u300/include/mach/u300-regs.h b/arch/arm/mach-u300/include/mach/u300-regs.h index 88333dfb19fc..56721a0cd2af 100644 --- a/arch/arm/mach-u300/include/mach/u300-regs.h +++ b/arch/arm/mach-u300/include/mach/u300-regs.h @@ -6,7 +6,7 @@ * Copyright (C) 2006-2009 ST-Ericsson AB * License terms: GNU General Public License (GPL) version 2 * Basic register address definitions in physical memory and - * some block defintions for core devices like the timer. + * some block definitions for core devices like the timer. * Author: Linus Walleij */ diff --git a/arch/sh/include/mach-common/mach/titan.h b/arch/sh/include/mach-common/mach/titan.h index 03f3583c8918..4a674d27cbb8 100644 --- a/arch/sh/include/mach-common/mach/titan.h +++ b/arch/sh/include/mach-common/mach/titan.h @@ -1,5 +1,5 @@ /* - * Platform defintions for Titan + * Platform definitions for Titan */ #ifndef _ASM_SH_TITAN_H #define _ASM_SH_TITAN_H diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 72e5a4491661..04459d25e66e 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -124,7 +124,7 @@ struct sigcontext { * fpstate is really (struct _fpstate *) or (struct _xstate *) * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of + * of extended memory layout. See comments at the definition of * (struct _fpx_sw_bytes) */ void __user *fpstate; /* zero when no FPU/extended context */ @@ -219,7 +219,7 @@ struct sigcontext { * fpstate is really (struct _fpstate *) or (struct _xstate *) * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the defintion of + * of extended memory layout. See comments at the definition of * (struct _fpx_sw_bytes) */ void __user *fpstate; /* zero when no FPU/extended context */ diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index dbaed1ea0da3..8098fccdbec4 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -5,7 +5,7 @@ //########################################################################### #define CISS_VERSION "1.00" -//general boundary defintions +//general boundary definitions #define SENSEINFOBYTES 32//note that this value may vary between host implementations #define MAXSGENTRIES 31 #define MAXREPLYQS 256 diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 5d402086bc47..811178907413 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -1141,7 +1141,7 @@ typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS { /* ucTableFormatRevision=1,ucTableContentRevision=2 */ typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS_V2 { USHORT usPixelClock; /* in 10KHz; for bios convenient */ - UCHAR ucMisc; /* see PANEL_ENCODER_MISC_xx defintions below */ + UCHAR ucMisc; /* see PANEL_ENCODER_MISC_xx definitions below */ UCHAR ucAction; /* 0: turn off encoder */ /* 1: setup and turn on encoder */ UCHAR ucTruncate; /* bit0=0: Disable truncate */ @@ -1424,7 +1424,7 @@ typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO { /* Structures used in FirmwareInfoTable */ /****************************************************************************/ -/* usBIOSCapability Defintion: */ +/* usBIOSCapability Definition: */ /* Bit 0 = 0: Bios image is not Posted, =1:Bios image is Posted; */ /* Bit 1 = 0: Dual CRTC is not supported, =1: Dual CRTC is supported; */ /* Bit 2 = 0: Extended Desktop is not supported, =1: Extended Desktop is supported; */ @@ -2386,7 +2386,7 @@ typedef struct _ATOM_ANALOG_TV_INFO_V1_2 { } ATOM_ANALOG_TV_INFO_V1_2; /**************************************************************************/ -/* VRAM usage and their defintions */ +/* VRAM usage and their definitions */ /* One chunk of VRAM used by Bios are for HWICON surfaces,EDID data. */ /* Current Mode timing and Dail Timing and/or STD timing data EACH device. They can be broken down as below. */ @@ -3046,7 +3046,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_DC 2 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_LITEAC 3 -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S0_CRT1_MONOb0 0x01 #define ATOM_S0_CRT1_COLORb0 0x02 #define ATOM_S0_CRT1_MASKb0 (ATOM_S0_CRT1_MONOb0+ATOM_S0_CRT1_COLORb0) @@ -3131,7 +3131,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S2_DISPLAY_ROTATION_DEGREE_SHIFT 30 #define ATOM_S2_DISPLAY_ROTATION_ANGLE_MASK 0xC0000000L -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S2_TV1_STANDARD_MASKb0 0x0F #define ATOM_S2_CURRENT_BL_LEVEL_MASKb1 0xFF #define ATOM_S2_CRT1_DPMS_STATEb2 0x01 @@ -3190,7 +3190,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S3_ALLOW_FAST_PWR_SWITCH 0x40000000L #define ATOM_S3_RQST_GPU_USE_MIN_PWR 0x80000000L -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S3_CRT1_ACTIVEb0 0x01 #define ATOM_S3_LCD1_ACTIVEb0 0x02 #define ATOM_S3_TV1_ACTIVEb0 0x04 @@ -3230,7 +3230,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S4_LCD1_REFRESH_MASK 0x0000FF00L #define ATOM_S4_LCD1_REFRESH_SHIFT 8 -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S4_LCD1_PANEL_ID_MASKb0 0x0FF #define ATOM_S4_LCD1_REFRESH_MASKb1 ATOM_S4_LCD1_PANEL_ID_MASKb0 #define ATOM_S4_VRAM_INFO_MASKb2 ATOM_S4_LCD1_PANEL_ID_MASKb0 @@ -3310,7 +3310,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO { #define ATOM_S6_VRI_BRIGHTNESS_CHANGE 0x40000000L #define ATOM_S6_CONFIG_DISPLAY_CHANGE_MASK 0x80000000L -/* Byte aligned defintion for BIOS usage */ +/* Byte aligned definition for BIOS usage */ #define ATOM_S6_DEVICE_CHANGEb0 0x01 #define ATOM_S6_SCALER_CHANGEb0 0x02 #define ATOM_S6_LID_CHANGEb0 0x04 diff --git a/drivers/media/video/cx18/cx18-av-core.h b/drivers/media/video/cx18/cx18-av-core.h index 9b84a0c58e0e..cafb7e99b9a0 100644 --- a/drivers/media/video/cx18/cx18-av-core.h +++ b/drivers/media/video/cx18/cx18-av-core.h @@ -294,7 +294,7 @@ struct cx18_av_state { #define CXADEC_QAM_CONST_DEC 0x924 #define CXADEC_QAM_ROTATOR_FREQ 0x948 -/* Bit defintions / settings used in Mako Audio */ +/* Bit definitions / settings used in Mako Audio */ #define CXADEC_PREF_MODE_MONO_LANGA 0 #define CXADEC_PREF_MODE_MONO_LANGB 1 #define CXADEC_PREF_MODE_MONO_LANGC 2 diff --git a/drivers/media/video/cx18/cx18-mailbox.h b/drivers/media/video/cx18/cx18-mailbox.h index e23aaac5b280..522ad534034c 100644 --- a/drivers/media/video/cx18/cx18-mailbox.h +++ b/drivers/media/video/cx18/cx18-mailbox.h @@ -41,7 +41,7 @@ struct cx18; /* * This structure is used by CPU to provide completed buffers information * Its structure is dictrated by the layout of the SCB, required by the - * firmware, but its defintion needs to be here, instead of in cx18-scb.h, + * firmware, but its definition needs to be here, instead of in cx18-scb.h, * for mailbox work order scheduling */ struct cx18_mdl_ack { diff --git a/drivers/net/wireless/ath/ath5k/base.h b/drivers/net/wireless/ath/ath5k/base.h index c79a65044b67..4da029410467 100644 --- a/drivers/net/wireless/ath/ath5k/base.h +++ b/drivers/net/wireless/ath/ath5k/base.h @@ -36,7 +36,7 @@ */ /* - * Defintions for the Atheros Wireless LAN controller driver. + * Definitions for the Atheros Wireless LAN controller driver. */ #ifndef _DEV_ATH_ATHVAR_H #define _DEV_ATH_ATHVAR_H diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index eecb2ee62e85..de7bccef49f0 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -507,7 +507,7 @@ extern unsigned char ULP_ENABLE[]; (PDU_ENCAPSULATION(buffer) + 0x17) #define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \ (PDU_ENCAPSULATION(buffer) + 0x2b) -/* Layer 2 defintions */ +/* Layer 2 definitions */ #define QETH_PROT_LAYER2 0x08 #define QETH_PROT_TCPIP 0x03 #define QETH_PROT_OSN2 0x0a diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index b756041f0b26..22626abdb630 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -7969,7 +7969,7 @@ static int advansys_reset(struct scsi_cmnd *scp) ASC_DBG(1, "before AscInitAsc1000Driver()\n"); status = AscInitAsc1000Driver(asc_dvc); - /* Refer to ASC_IERR_* defintions for meaning of 'err_code'. */ + /* Refer to ASC_IERR_* definitions for meaning of 'err_code'. */ if (asc_dvc->err_code) { scmd_printk(KERN_INFO, scp, "SCSI bus reset error: " "0x%x\n", asc_dvc->err_code); diff --git a/drivers/scsi/aic94xx/aic94xx_reg_def.h b/drivers/scsi/aic94xx/aic94xx_reg_def.h index a43e8cdf4ee4..28aaf349c111 100644 --- a/drivers/scsi/aic94xx/aic94xx_reg_def.h +++ b/drivers/scsi/aic94xx/aic94xx_reg_def.h @@ -1,5 +1,5 @@ /* - * Aic94xx SAS/SATA driver hardware registers defintions. + * Aic94xx SAS/SATA driver hardware registers definitions. * * Copyright (C) 2004 Adaptec, Inc. All rights reserved. * Copyright (C) 2004 David Chaw diff --git a/drivers/scsi/bfa/include/protocol/ct.h b/drivers/scsi/bfa/include/protocol/ct.h index c59d6630b070..79d16a9ab281 100644 --- a/drivers/scsi/bfa/include/protocol/ct.h +++ b/drivers/scsi/bfa/include/protocol/ct.h @@ -82,7 +82,7 @@ enum { }; /* - * defintions for CT reason code + * definitions for CT reason code */ enum { CT_RSN_INV_CMD = 0x01, @@ -129,7 +129,7 @@ enum { }; /* - * defintions for the explanation code for all servers + * definitions for the explanation code for all servers */ enum { CT_EXP_AUTH_EXCEPTION = 0xF1, diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c index fa738ec8692a..207352cc70cc 100644 --- a/drivers/scsi/dmx3191d.c +++ b/drivers/scsi/dmx3191d.c @@ -31,7 +31,7 @@ #include /* - * Defintions for the generic 5380 driver. + * Definitions for the generic 5380 driver. */ #define AUTOSENSE diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c index 093610bcfcce..2f6e9d8eaf71 100644 --- a/drivers/scsi/wd7000.c +++ b/drivers/scsi/wd7000.c @@ -161,7 +161,7 @@ * * 2003/02/12 - Christoph Hellwig * - * Cleaned up host template defintion + * Cleaned up host template definition * Removed now obsolete wd7000.h */ diff --git a/drivers/staging/otus/80211core/pub_zfi.h b/drivers/staging/otus/80211core/pub_zfi.h index a35bd5d41d2c..60b7d1c56dee 100644 --- a/drivers/staging/otus/80211core/pub_zfi.h +++ b/drivers/staging/otus/80211core/pub_zfi.h @@ -20,7 +20,7 @@ #include "../oal_dt.h" /***** Section 1 : Tunable Parameters *****/ -/* The defintions in this section are tunabel parameters */ +/* The definitions in this section are tunabel parameters */ /* Maximum number of BSS that could be scaned */ #define ZM_MAX_BSS 128 diff --git a/drivers/staging/otus/zdcompat.h b/drivers/staging/otus/zdcompat.h index 84ac43356b77..cdcaef54afcd 100644 --- a/drivers/staging/otus/zdcompat.h +++ b/drivers/staging/otus/zdcompat.h @@ -17,7 +17,7 @@ /* Module Name : zdcompat.h */ /* */ /* Abstract */ -/* This module contains function defintion for compatibility. */ +/* This module contains function definition for compatibility. */ /* */ /* NOTES */ /* Platform dependent. */ diff --git a/drivers/staging/rtl8192su/r8192S_phyreg.h b/drivers/staging/rtl8192su/r8192S_phyreg.h index 96c7cfa92542..acf644f430aa 100644 --- a/drivers/staging/rtl8192su/r8192S_phyreg.h +++ b/drivers/staging/rtl8192su/r8192S_phyreg.h @@ -38,7 +38,7 @@ // 2. 0x800/0x900/0xA00/0xC00/0xD00/0xE00 // 3. RF register 0x00-2E // 4. Bit Mask for BB/RF register -// 5. Other defintion for BB/RF R/W +// 5. Other definition for BB/RF R/W // diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 2d07f890a842..3877737f96a6 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1227,7 +1227,7 @@ typedef struct smb_com_setattr_rsp { /* empty wct response to setattr */ /*******************************************************/ -/* NT Transact structure defintions follow */ +/* NT Transact structure definitions follow */ /* Currently only ioctl, acl (get security descriptor) */ /* and notify are implemented */ /*******************************************************/ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index f91fd51b32e3..5fc8e52ee306 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2655,7 +2655,7 @@ COMPATIBLE_IOCTL(TIOCSLTC) #endif #ifdef TIOCSTART /* - * For these two we have defintions in ioctls.h and/or termios.h on + * For these two we have definitions in ioctls.h and/or termios.h on * some architectures but no actual implemention. Some applications * like bash call them if they are defined in the headers, so we provide * entries here to avoid syslog message spew. diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h index cb57c30081a8..eb130b4d8e72 100644 --- a/include/linux/cciss_ioctl.h +++ b/include/linux/cciss_ioctl.h @@ -39,7 +39,7 @@ typedef __u32 DriverVer_type; #ifndef CCISS_CMD_H // This defines are duplicated in cciss_cmd.h in the driver directory -//general boundary defintions +//general boundary definitions #define SENSEINFOBYTES 32//note that this value may vary between host implementations //Command Status value diff --git a/include/linux/in6.h b/include/linux/in6.h index 718bf21c5754..010290dd79bb 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -113,7 +113,7 @@ struct in6_flowlabel_req #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff #define IPV6_FLOWINFO_PRIORITY 0x0ff00000 -/* These defintions are obsolete */ +/* These definitions are obsolete */ #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 #define IPV6_PRIORITY_FILLER 0x0100 #define IPV6_PRIORITY_UNATTENDED 0x0200 diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h index 429c631d2aad..63ebdcc5dda6 100644 --- a/include/linux/usb/wusb.h +++ b/include/linux/usb/wusb.h @@ -74,7 +74,7 @@ enum { * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of * data. In order to avoid confusion and enforce types, we wrap it. * - * Make it packed, as we use it in some hw defintions. + * Make it packed, as we use it in some hw definitions. */ struct wusb_ckhdid { u8 data[16]; -- cgit v1.2.3 From 575939cf548951dde8df0786899ea5a91bb669b2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Nov 2009 18:05:12 -0800 Subject: x86/PCI: claim SR-IOV BARs in pcibios_allocate_resource This allows us to use the BIOS SR-IOV allocations rather than assigning our own later on. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b73c09f45210..5dc9e8c63fcd 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -146,16 +146,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) } } +struct pci_check_idx_range { + int start; + int end; +}; + static void __init pcibios_allocate_resources(int pass) { struct pci_dev *dev = NULL; - int idx, disabled; + int idx, disabled, i; u16 command; struct resource *r; + struct pci_check_idx_range idx_range[] = { + { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END }, +#ifdef CONFIG_PCI_IOV + { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END }, +#endif + }; + for_each_pci_dev(dev) { pci_read_config_word(dev, PCI_COMMAND, &command); - for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { + for (i = 0; i < ARRAY_SIZE(idx_range); i++) + for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { r = &dev->resource[idx]; if (r->parent) /* Already allocated */ continue; -- cgit v1.2.3 From 5d990b627537e59a3a2f039ff588a4750e9c1a6a Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Fri, 4 Dec 2009 12:15:21 -0800 Subject: PCI: add pci_request_acs Commit ae21ee65e8bc228416bbcc8a1da01c56a847a60c "PCI: acs p2p upsteram forwarding enabling" doesn't actually enable ACS. Add a function to pci core to allow an IOMMU to request that ACS be enabled. The existing mechanism of using iommu_found() in the pci core to know when ACS should be enabled doesn't actually work due to initialization order; iommu has only been detected not initialized. Have Intel and AMD IOMMUs request ACS, and Xen does as well during early init of dom0. Cc: Allen Kay Cc: David Woodhouse Cc: Jeremy Fitzhardinge Cc: Joerg Roedel Signed-off-by: Chris Wright Signed-off-by: Jesse Barnes --- arch/x86/kernel/amd_iommu_init.c | 2 ++ arch/x86/xen/enlighten.c | 5 +++++ drivers/pci/dmar.c | 5 ++++- drivers/pci/pci.c | 13 +++++++++++++ drivers/pci/probe.c | 5 +---- include/linux/pci.h | 2 ++ 6 files changed, 27 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b4b61d462dcc..e60530a5f524 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1330,6 +1330,8 @@ void __init amd_iommu_detect(void) gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; #endif + /* Make sure ACS will be enabled */ + pci_request_acs(); } } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5bccd706232c..e2511bccbc8d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1170,7 +1171,11 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } else { + /* Make sure ACS will be enabled */ + pci_request_acs(); } + xen_raw_console_write("about to get started...\n"); diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index e01ca4d6b3e6..0e98f6b6f515 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -614,8 +614,11 @@ void __init detect_intel_iommu(void) #endif #ifdef CONFIG_DMAR if (ret && !no_iommu && !iommu_detected && !swiotlb && - !dmar_disabled) + !dmar_disabled) { iommu_detected = 1; + /* Make sure ACS will be enabled */ + pci_request_acs(); + } #endif } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6af212c509c5..cd9b375f49d5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1550,6 +1550,16 @@ void pci_enable_ari(struct pci_dev *dev) bridge->ari_enabled = 1; } +static int pci_acs_enable; + +/** + * pci_request_acs - ask for ACS to be enabled if supported + */ +void pci_request_acs(void) +{ + pci_acs_enable = 1; +} + /** * pci_enable_acs - enable ACS if hardware support it * @dev: the PCI device @@ -1560,6 +1570,9 @@ void pci_enable_acs(struct pci_dev *dev) u16 cap; u16 ctrl; + if (!pci_acs_enable) + return; + if (!pci_is_pcie(dev)) return; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2fdffc02a308..98ffb2de22e9 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -10,9 +10,7 @@ #include #include #include -#include #include -#include #include "pci.h" #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ @@ -1029,8 +1027,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_iov_init(dev); /* Enable ACS P2P upstream forwarding */ - if (iommu_found() || xen_initial_domain()) - pci_enable_acs(dev); + pci_enable_acs(dev); } void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) diff --git a/include/linux/pci.h b/include/linux/pci.h index 2891c3d3e51a..04771b9c3316 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1328,5 +1328,7 @@ static inline bool pci_is_pcie(struct pci_dev *dev) return !!pci_pcie_cap(dev); } +void pci_request_acs(void); + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 4832ddda2ec4df96ea1eed334ae2dbd65fc1f541 Mon Sep 17 00:00:00 2001 From: Leann Ogasawara Date: Fri, 4 Dec 2009 15:42:22 -0800 Subject: x86: ASUS P4S800 reboot=bios quirk Bug reporter noted their system with an ASUS P4S800 motherboard would hang when rebooting unless reboot=b was specified. Their dmidecode didn't contain descriptive System Information for Manufacturer or Product Name, so I used their Base Board Information to create a reboot quirk patch. The bug reporter confirmed this patch resolves the reboot hang. Handle 0x0001, DMI type 1, 25 bytes System Information Manufacturer: System Manufacturer Product Name: System Name Version: System Version Serial Number: SYS-1234567890 UUID: E0BFCD8B-7948-D911-A953-E486B4EEB67F Wake-up Type: Power Switch Handle 0x0002, DMI type 2, 8 bytes Base Board Information Manufacturer: ASUSTeK Computer INC. Product Name: P4S800 Version: REV 1.xx Serial Number: xxxxxxxxxxx BugLink: http://bugs.launchpad.net/bugs/366682 ASUS P4S800 will hang when rebooting unless reboot=b is specified. Add a quirk to reboot through the bios. Signed-off-by: Leann Ogasawara LKML-Reference: <1259972107.4629.275.camel@emiko> Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e00..6caf26010970 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -259,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), }, }, + { /* Handle problems with rebooting on ASUS P4S800 */ + .callback = set_bios_reboot, + .ident = "ASUS P4S800", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4S800"), + }, + }, { } }; -- cgit v1.2.3 From a5fc5eba4dfcc284e6adcd7fdcd5b43182230d2b Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:51 -0800 Subject: x86: Convert BUG() to use unreachable() Use the new unreachable() macro instead of for(;;);. When allyesconfig is built with a GCC-4.5 snapshot on i686 the size of the text segment is reduced by 3987 bytes (from 6827019 to 6823032). Signed-off-by: David Daney Acked-by: "H. Peter Anvin" CC: Thomas Gleixner CC: Ingo Molnar CC: x86@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/include/asm/bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d9cf1cd156d2..f654d1bb17fb 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -22,14 +22,14 @@ do { \ ".popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (sizeof(struct bug_entry))); \ - for (;;) ; \ + unreachable(); \ } while (0) #else #define BUG() \ do { \ asm volatile("ud2"); \ - for (;;) ; \ + unreachable(); \ } while (0) #endif -- cgit v1.2.3 From 2f0993e0fb663c49e4d1e02654f6203246be4817 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 07:06:10 +0100 Subject: hw-breakpoints: Drop callback and task parameters from modify helper Drop the callback and task parameters from modify_user_hw_breakpoint(). For now we have no user that need to modify a breakpoint to the point of changing its handler or its task context. Signed-off-by: Frederic Weisbecker Cc: "K. Prasad" --- arch/x86/kernel/ptrace.c | 4 ++-- include/linux/hw_breakpoint.h | 9 ++------- kernel/hw_breakpoint.c | 7 +++---- 3 files changed, 7 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 04d182a7cfdb..dbb395572ae2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -618,7 +618,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, attr.bp_type = gen_type; attr.disabled = disabled; - return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); + return modify_user_hw_breakpoint(bp, &attr); } /* @@ -740,7 +740,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, attr = bp->attr; attr.bp_addr = addr; - bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); + bp = modify_user_hw_breakpoint(bp, &attr); } /* * CHECKME: the previous code returned -EIO if the addr wasn't a diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index a03daed08c59..d33096e0dbd4 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -57,10 +57,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, /* FIXME: only change from the attr, and don't unregister */ extern struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - struct perf_event_attr *attr, - perf_callback_t triggered, - struct task_struct *tsk); +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); /* * Kernel breakpoints are not associated with any particular thread. @@ -97,9 +94,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, struct task_struct *tsk) { return NULL; } static inline struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - struct perf_event_attr *attr, - perf_callback_t triggered, - struct task_struct *tsk) { return NULL; } + struct perf_event_attr *attr) { return NULL; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index cf5ee1628411..2d10b012828f 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -312,9 +312,7 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); * @tsk: pointer to 'task_struct' of the process to which the address belongs */ struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, - perf_callback_t triggered, - struct task_struct *tsk) +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { /* * FIXME: do it without unregistering @@ -323,7 +321,8 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, */ unregister_hw_breakpoint(bp); - return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); + return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid, + bp->callback); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); -- cgit v1.2.3 From b326e9560a28fc3e950637ef51847ed8f05c1335 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 09:44:31 +0100 Subject: hw-breakpoints: Use overflow handler instead of the event callback struct perf_event::event callback was called when a breakpoint triggers. But this is a rather opaque callback, pretty tied-only to the breakpoint API and not really integrated into perf as it triggers even when we don't overflow. We prefer to use overflow_handler() as it fits into the perf events rules, being called only when we overflow. Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" --- arch/x86/kernel/hw_breakpoint.c | 5 ++--- arch/x86/kernel/ptrace.c | 9 ++++++--- include/linux/hw_breakpoint.h | 25 +++++++++++-------------- include/linux/perf_event.h | 13 +++++++------ kernel/hw_breakpoint.c | 17 +++++------------ kernel/perf_event.c | 24 +++++++++--------------- kernel/trace/trace_ksym.c | 5 +++-- samples/hw_breakpoint/data_breakpoint.c | 7 +++++-- 8 files changed, 48 insertions(+), 57 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d42f65ac4927..05d5fec64a94 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, return ret; } - if (bp->callback) - ret = arch_store_info(bp); + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) break; } - (bp->callback)(bp, args->regs); + perf_bp_event(bp, args->regs); rcu_read_unlock(); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index dbb395572ae2..b361d28061d0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target, return ret; } -static void ptrace_triggered(struct perf_event *bp, void *data) +static void ptrace_triggered(struct perf_event *bp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { int i; struct thread_struct *thread = &(current->thread); @@ -599,7 +601,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, { int err; int gen_len, gen_type; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; /* * We shoud have at least an inactive breakpoint at this @@ -721,9 +723,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, { struct perf_event *bp; struct thread_struct *t = &tsk->thread; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; if (!t->ptrace_bps[nr]) { + hw_breakpoint_init(&attr); /* * Put stub len and type to register (reserve) an inactive but * correct bp diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index d33096e0dbd4..4d14a384a01e 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -20,19 +20,16 @@ enum { #ifdef CONFIG_HAVE_HW_BREAKPOINT -/* As it's for in-kernel or ptrace use, we want it to be pinned */ -#define DEFINE_BREAKPOINT_ATTR(name) \ -struct perf_event_attr name = { \ - .type = PERF_TYPE_BREAKPOINT, \ - .size = sizeof(name), \ - .pinned = 1, \ -}; - static inline void hw_breakpoint_init(struct perf_event_attr *attr) { attr->type = PERF_TYPE_BREAKPOINT; attr->size = sizeof(*attr); + /* + * As it's for in-kernel or ptrace use, we want it to be pinned + * and to call its callback every hits. + */ attr->pinned = 1; + attr->sample_period = 1; } static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) @@ -52,7 +49,7 @@ static inline int hw_breakpoint_len(struct perf_event *bp) extern struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered, + perf_overflow_handler_t triggered, struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ @@ -64,12 +61,12 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); */ extern struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, - perf_callback_t triggered, + perf_overflow_handler_t triggered, int cpu); extern struct perf_event ** register_wide_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered); + perf_overflow_handler_t triggered); extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int __register_perf_hw_breakpoint(struct perf_event *bp); @@ -90,18 +87,18 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) static inline struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered, + perf_overflow_handler_t triggered, struct task_struct *tsk) { return NULL; } static inline struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { return NULL; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, - perf_callback_t triggered, + perf_overflow_handler_t triggered, int cpu) { return NULL; } static inline struct perf_event ** register_wide_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered) { return NULL; } + perf_overflow_handler_t triggered) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline int diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 84bd28a0ffab..d2f2667430da 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,10 +565,13 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; -typedef void (*perf_callback_t)(struct perf_event *, void *); - struct perf_sample_data; +typedef void (*perf_callback_t)(struct perf_event *, void *); +typedef void (*perf_overflow_handler_t)(struct perf_event *, int, + struct perf_sample_data *, + struct pt_regs *regs); + /** * struct perf_event - performance event kernel representation: */ @@ -660,9 +663,7 @@ struct perf_event { struct pid_namespace *ns; u64 id; - void (*overflow_handler)(struct perf_event *event, - int nmi, struct perf_sample_data *data, - struct pt_regs *regs); + perf_overflow_handler_t overflow_handler; #ifdef CONFIG_EVENT_PROFILE struct event_filter *filter; @@ -779,7 +780,7 @@ extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, pid_t pid, - perf_callback_t callback); + perf_overflow_handler_t callback); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2d10b012828f..b600fc27f161 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -259,7 +259,7 @@ void release_bp_slot(struct perf_event *bp) } -int __register_perf_hw_breakpoint(struct perf_event *bp) +int register_perf_hw_breakpoint(struct perf_event *bp) { int ret; @@ -276,19 +276,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp) * This is a quick hack that will be removed soon, once we remove * the tmp breakpoints from ptrace */ - if (!bp->attr.disabled || bp->callback == perf_bp_event) + if (!bp->attr.disabled || !bp->overflow_handler) ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); return ret; } -int register_perf_hw_breakpoint(struct perf_event *bp) -{ - bp->callback = perf_bp_event; - - return __register_perf_hw_breakpoint(bp); -} - /** * register_user_hw_breakpoint - register a hardware breakpoint for user space * @attr: breakpoint attributes @@ -297,7 +290,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) */ struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered, + perf_overflow_handler_t triggered, struct task_struct *tsk) { return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); @@ -322,7 +315,7 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) unregister_hw_breakpoint(bp); return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid, - bp->callback); + bp->overflow_handler); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); @@ -347,7 +340,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); */ struct perf_event ** register_wide_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered) + perf_overflow_handler_t triggered) { struct perf_event **cpu_events, **pevent, *bp; long err; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6b7ddba1dd64..fd43ff4ac860 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4286,15 +4286,8 @@ static void bp_perf_event_destroy(struct perf_event *event) static const struct pmu *bp_perf_event_init(struct perf_event *bp) { int err; - /* - * The breakpoint is already filled if we haven't created the counter - * through perf syscall - * FIXME: manage to get trigerred to NULL if it comes from syscalls - */ - if (!bp->callback) - err = register_perf_hw_breakpoint(bp); - else - err = __register_perf_hw_breakpoint(bp); + + err = register_perf_hw_breakpoint(bp); if (err) return ERR_PTR(err); @@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, - perf_callback_t callback, + perf_overflow_handler_t overflow_handler, gfp_t gfpflags) { const struct pmu *pmu; @@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; - if (!callback && parent_event) - callback = parent_event->callback; + if (!overflow_handler && parent_event) + overflow_handler = parent_event->overflow_handler; - event->callback = callback; + event->overflow_handler = overflow_handler; if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4776,7 +4769,8 @@ err_put_context: */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, perf_callback_t callback) + pid_t pid, + perf_overflow_handler_t overflow_handler) { struct perf_event *event; struct perf_event_context *ctx; @@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, } event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, callback, GFP_KERNEL); + NULL, overflow_handler, GFP_KERNEL); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_put_context; diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index ddfa0fd43bc0..acb87d4a4ac1 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) } #endif /* CONFIG_PROFILE_KSYM_TRACER */ -void ksym_hbp_handler(struct perf_event *hbp, void *data) +void ksym_hbp_handler(struct perf_event *hbp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { struct ring_buffer_event *event; struct ksym_trace_entry *entry; - struct pt_regs *regs = data; struct ring_buffer *buffer; int pc; diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 29525500df00..c69cbe9b2426 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -41,7 +41,9 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" " write operations on the kernel symbol"); -static void sample_hbp_handler(struct perf_event *temp, void *data) +static void sample_hbp_handler(struct perf_event *bp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { printk(KERN_INFO "%s value is changed\n", ksym_name); dump_stack(); @@ -51,8 +53,9 @@ static void sample_hbp_handler(struct perf_event *temp, void *data) static int __init hw_break_module_init(void) { int ret; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; + hw_breakpoint_init(&attr); attr.bp_addr = kallsyms_lookup_name(ksym_name); attr.bp_len = HW_BREAKPOINT_LEN_4; attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; -- cgit v1.2.3 From 7f33f9c5cc3c99aeaf4d266a7ed502b828115a53 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 12:01:17 +0100 Subject: x86/perf: Exclude the debug stack from the callchains Dumping the callchains from breakpoint events with perf gives strange results: 3.75% perf [kernel] [k] _raw_read_unlock | --- _raw_read_unlock perf_callchain perf_prepare_sample __perf_event_overflow perf_swevent_overflow perf_swevent_add perf_bp_event hw_breakpoint_exceptions_notify notifier_call_chain __atomic_notifier_call_chain atomic_notifier_call_chain notify_die do_debug debug munmap We are infected with all the debug stack. Like the nmi stack, the debug stack is undesired as it is part of the profiling path, not helpful for the user. Ignore it. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" --- arch/x86/kernel/cpu/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c1bbed1021d9..d35f26076ae5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2287,7 +2287,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); +static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2303,8 +2303,9 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); + per_cpu(in_ignored_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name) || + x86_is_stack_id(DEBUG_STACK, name); return 0; } @@ -2313,7 +2314,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_nmi_frame, smp_processor_id())) + if (per_cpu(in_ignored_frame, smp_processor_id())) return; if (reliable) -- cgit v1.2.3 From b625b3b3b740e177a1148594cd3ad5ff52f35315 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 6 Dec 2009 00:52:56 +0100 Subject: x86: Fixup wrong debug exception frame link in stacktraces While dumping a stacktrace, the end of the exception stack won't link the frame pointer to the previous stack. The interrupted stack will then be considered as unreliable and ignored by perf, as the frame pointer is unreliable itself. This happens because we overwrite the frame pointer that links to the interrupted frame with the address of the exception stack. This is done in order to reserve space inside. But rbp has been chosen here only because it is not a scratch register, so that the address of the exception stack remains in rbp after calling do_debug(), we can then release the exception stack space without the need to retrieve its address again. But we can pick another non-scratch register to do that, so that we preserve the link to the interrupted stack frame in the stacktraces. Just randomly choose r12. Every registers are saved just before and restored just after calling do_debug(). And r12 is not used in the middle, which makes it a perfect candidate. Example: perf record -g -a -c 1 -f -e mem:$(tasklist_lock_addr):rw Before: 44.18% [k] _raw_read_lock | | --- |--6.31%-- waitid | |--4.26%-- writev | |--3.63%-- __select | |--3.15%-- __waitpid | | | |--28.57%-- 0x8b52e00000139f | | | |--28.57%-- 0x8b52e0000013c6 | | | |--14.29%-- 0x7fde786dc000 | | | |--14.29%-- 0x62696c2f7273752f | | | --14.29%-- 0x1ea9df800000000 | |--3.00%-- __poll After: 43.94% [k] _raw_read_lock | --- _read_lock | |--60.53%-- send_sigio | __kill_fasync | kill_fasync | evdev_pass_event | evdev_event | input_pass_event | input_handle_event | input_event | synaptics_process_byte | psmouse_handle_byte | psmouse_interrupt | serio_interrupt | i8042_interrupt | handle_IRQ_event | handle_edge_irq | handle_irq | __irqentry_text_start | ret_from_intr | | | |--30.43%-- __select | | | |--17.39%-- 0x454f15 | | | |--13.04%-- __read | | | |--13.04%-- vread_hpet | | | |--13.04%-- _xcb_lock_io | | | --13.04%-- 0x7f630878ce87 Note: it does not only affect perf events but also other stacktraces in x86-64. They were considered as unreliable once we quit the debug stack frame. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" Cc: Thomas Gleixner Cc: "H. Peter Anvin" --- arch/x86/kernel/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 722df1b1152d..0f08a0cea3e0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1076,10 +1076,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - PER_CPU(init_tss, %rbp) - subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %r12) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) call \do_sym - addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) -- cgit v1.2.3 From af2d8289f57e427836be482c6f72cca674028121 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 6 Dec 2009 05:34:27 +0100 Subject: x86: Fixup wrong irq frame link in stacktraces When we enter in irq, two things can happen to preserve the link to the previous frame pointer: - If we were in an irq already, we don't switch to the irq stack as we are inside. We just need to save the previous frame pointer and to link the new one to the previous. - Otherwise we need another level of indirection. We enter the irq with the previous stack. We save the previous bp inside and make bp pointing to its saved address. Then we switch to the irq stack and push bp another time but to the new stack. This makes two levels to dereference instead of one. In the second case, the current stacktrace code omits the second level and loses the frame pointer accuracy. The stack that follows will then be considered as unreliable. Handling that makes the perf callchain happier. Before: 43.94% [k] _raw_read_lock | --- _read_lock | |--60.53%-- send_sigio | __kill_fasync | kill_fasync | evdev_pass_event | evdev_event | input_pass_event | input_handle_event | input_event | synaptics_process_byte | psmouse_handle_byte | psmouse_interrupt | serio_interrupt | i8042_interrupt | handle_IRQ_event | handle_edge_irq | handle_irq | __irqentry_text_start | ret_from_intr | | | |--30.43%-- __select | | | |--17.39%-- 0x454f15 | | | |--13.04%-- __read | | | |--13.04%-- vread_hpet | | | |--13.04%-- _xcb_lock_io | | | --13.04%-- 0x7f630878ce8 After: 50.00% [k] _raw_read_lock | --- _read_lock | |--98.97%-- send_sigio | __kill_fasync | kill_fasync | evdev_pass_event | evdev_event | input_pass_event | input_handle_event | input_event | | | |--96.88%-- synaptics_process_byte | | psmouse_handle_byte | | psmouse_interrupt | | serio_interrupt | | i8042_interrupt | | handle_IRQ_event | | handle_edge_irq | | handle_irq | | __irqentry_text_start | | ret_from_intr | | | | | |--39.78%-- __const_udelay | | | | | | | |--91.89%-- ath5k_hw_register_timeout | | | | ath5k_hw_noise_floor_calibration | | | | ath5k_hw_reset | | | | ath5k_reset | | | | ath5k_config | | | | ieee80211_hw_config | | | | | | | | | |--88.24%-- ieee80211_scan_work | | | | | worker_thread | | | | | kthread | | | | | child_rip | | | | | | | | | --11.76%-- ieee80211_scan_completed | | | | ieee80211_scan_work | | | | worker_thread | | | | kthread | | | | child_rip | | | | | | | --8.11%-- ath5k_hw_noise_floor_calibration | | | ath5k_hw_reset | | | ath5k_reset | | | ath5k_config Note: This does not only affect perf events but also x86-64 stacktraces. They were considered as unreliable once we quit the irq stack frame. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" Cc: Thomas Gleixner Cc: "H. Peter Anvin" --- arch/x86/kernel/dumpstack_64.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177e..004b8aa6a35f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -101,6 +101,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } +static inline int +in_irq_stack(unsigned long *stack, unsigned long *irq_stack, + unsigned long *irq_stack_end) +{ + return (stack >= irq_stack && stack < irq_stack_end); +} + +/* + * We are returning from the irq stack and go to the previous one. + * If the previous stack is also in the irq stack, then bp in the first + * frame of the irq stack points to the previous, interrupted one. + * Otherwise we have another level of indirection: We first save + * the bp of the previous stack, then we switch the stack to the irq one + * and save a new bp that links to the previous one. + * (See save_args()) + */ +static inline unsigned long +fixup_bp_irq_link(unsigned long bp, unsigned long *stack, + unsigned long *irq_stack, unsigned long *irq_stack_end) +{ +#ifdef CONFIG_FRAME_POINTER + struct stack_frame *frame = (struct stack_frame *)bp; + + if (!in_irq_stack(stack, irq_stack, irq_stack_end)) + return (unsigned long)frame->next_frame; +#endif + return bp; +} + /* * x86-64 can have up to three kernel stacks: * process stack @@ -173,7 +202,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, irq_stack = irq_stack_end - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irq_stack && stack < irq_stack_end) { + if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, @@ -184,6 +213,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * pointer (index -1 to end) in the IRQ stack: */ stack = (unsigned long *) (irq_stack_end[-1]); + bp = fixup_bp_irq_link(bp, stack, irq_stack, + irq_stack_end); irq_stack_end = NULL; ops->stack(data, "EOI"); continue; -- cgit v1.2.3 From 8055039c2a2454c7159dcbde3161943b757a6e0e Mon Sep 17 00:00:00 2001 From: Shaun Patterson Date: Sat, 5 Dec 2009 10:41:34 -0500 Subject: x86: Fix typo in arch/x86/mm/kmmio.c Signed-off-by: Shaun Patterson Cc: Jiri Kosina Cc: pq@iki.fi LKML-Reference: <1260027694.10074.170.camel@linux-4lgc.site> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd77917f..72f157247ab1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) * 2. remove_kmmio_fault_pages() * Remove the pages from kmmio_page_table. * 3. rcu_free_kmmio_fault_pages() - * Actally free the kmmio_fault_page structs as with RCU. + * Actually free the kmmio_fault_page structs as with RCU. */ void unregister_kmmio_probe(struct kmmio_probe *p) { -- cgit v1.2.3 From be2bf0a2dfbba785860284968fa055006eb1610e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 6 Dec 2009 12:40:59 +0100 Subject: x86, perf probe: Fix warning in test_get_len() Fix the following warning: arch/x86/tools/test_get_len.c: In function "main": arch/x86/tools/test_get_len.c:116: warning: unused variable "c" Signed-off-by: Jean Delvare Cc: Masami Hiramatsu Signed-off-by: Ingo Molnar --- arch/x86/tools/test_get_len.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index d8214dc03fa7..bee8d6ac2691 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -113,7 +113,7 @@ int main(int argc, char **argv) char line[BUFSIZE], sym[BUFSIZE] = ""; unsigned char insn_buf[16]; struct insn insn; - int insns = 0, c; + int insns = 0; int warnings = 0; parse_args(argc, argv); -- cgit v1.2.3 From cbe5c34c8c1f8ed1afbe6273f4ad57fcfad7822f Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 6 Dec 2009 20:14:29 +0900 Subject: x86: Compile insn.c and inat.c only for KPROBES At least, insn.c and inat.c is needed for kprobe for now. So, this compile those only if KPROBES is enabled. Signed-off-by: OGAWA Hirofumi Cc: Masami Hiramatsu LKML-Reference: <878wdg8icq.fsf@devron.myhome.or.jp> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 4 ++-- arch/x86/lib/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7d0b681a132b..0e90929da40f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT def_bool y config X86_DECODER_SELFTEST - bool "x86 instruction decoder selftest" - depends on DEBUG_KERNEL + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL && KPROBES ---help--- Perform x86 instruction decoder selftests at build time. This option is useful for checking the sanity of x86 instruction diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472895fb..442b3b3b2d80 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,7 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-y += insn.o inat.o +lib-$(CONFIG_KPROBES) += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o -- cgit v1.2.3 From a946d8f11f0da9cfc714248036fcfd3a794d1e27 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Dec 2009 12:59:46 +0100 Subject: x86: Fix bogus warning in apic_noop.apic_write() apic_noop is used to provide dummy apic functions. It's installed when the CPU has no APIC or when the APIC is disabled on the kernel command line. The apic_noop implementation of apic_write() warns when the CPU has an APIC or when the APIC is not disabled. That's bogus. The warning should only happen when the CPU has an APIC _AND_ the APIC is not disabled. apic_noop.apic_read() has the correct check. Signed-off-by: Thomas Gleixner Cc: Cyrill Gorcunov Cc: # in <= .32 this typo resides in native_apic_write_dummy() LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_noop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index d9acc3bee0f4..e31b9ffe25f5 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -127,7 +127,7 @@ static u32 noop_apic_read(u32 reg) static void noop_apic_write(u32 reg, u32 v) { - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); + WARN_ON_ONCE(cpu_has_apic && !disable_apic); } struct apic apic_noop = { -- cgit v1.2.3 From d32ba45503acf9c23b301eba2397ca2ee322627b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 7 Dec 2009 12:00:33 -0500 Subject: x86 insn: Delete empty or incomplete inat-tables.c Delete empty or incomplete inat-tables.c if gen-insn-attr-x86.awk failed, because it causes a build error if user tries to build kernel next time. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jens Axboe Cc: Frederic Weisbecker LKML-Reference: <20091207170033.19230.37688.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 442b3b3b2d80..45b20e486c2f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ - cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) $(call cmd,inat_tables) -- cgit v1.2.3 From bc09effabf0c5c6c7021e5ef9af15a23579b32a8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 8 Dec 2009 11:21:37 +0900 Subject: x86/mce: Set up timer unconditionally mce_timer must be passed to setup_timer() in all cases, no matter whether it is going to be actually used. Otherwise, when the CPU gets brought down, its call to del_timer_sync() will never return, as the timer won't have a base associated, and hence lock_timer_base() will loop infinitely. Signed-off-by: Jan Beulich Signed-off-by: Hidetoshi Seto Cc: LKML-Reference: <4B1DB831.2030801@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d7ebf25d10ed..a96e5cd256a9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1388,13 +1388,14 @@ static void __mcheck_cpu_init_timer(void) struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); + setup_timer(t, mce_start_timer, smp_processor_id()); + if (mce_ignore_ce) return; *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } -- cgit v1.2.3 From 5c0e9f28da84c68ce0ae68b7a75faaf862e156e2 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 8 Dec 2009 16:52:44 +0900 Subject: x86, mce: fix confusion between bank attributes and mce attributes Commit cebe182033f156b430952370fb0f9dbe6e89b081 had an unnecessary, wrong change: &mce_banks[i].attr is equivalent to the former bank_attrs[i], not to mce_attrs[i]. Signed-off-by: Hidetoshi Seto Acked-by: Andi Kleen LKML-Reference: <4B1E05CC.4040703@jp.fujitsu.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a96e5cd256a9..a8aacd4b513c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1929,7 +1929,7 @@ error2: sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); + sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); sysdev_unregister(&per_cpu(mce_dev, cpu)); -- cgit v1.2.3 From f58e1f53de52a70391b6478617311207c7203363 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 8 Dec 2009 22:30:50 -0800 Subject: arch/x86/kernel/microcode*: Use pr_fmt() and remove duplicated KERN_ERR prefix - Use #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Remove "microcode: " prefix from each pr_ - Fix duplicated KERN_ERR prefix - Coalesce pr_ format strings - Add a space after an exclamation point No other change in output. Signed-off-by: Joe Perches Cc: Andy Whitcroft Cc: Andreas Herrmann LKML-Reference: <1260340250.27677.191.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 40 ++++++++++++++++----------------- arch/x86/kernel/microcode_core.c | 26 ++++++++++++---------- arch/x86/kernel/microcode_intel.c | 47 +++++++++++++++++---------------------- 3 files changed, 53 insertions(+), 60 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 63123d902103..37542b67c57e 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -13,6 +13,9 @@ * Licensed under the terms of the GNU General Public * License version 2. See file COPYING for details. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -81,7 +84,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); - pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); + pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; } @@ -111,8 +114,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) /* ucode might be chipset specific -- currently we don't support this */ if (mc_header->nb_dev_id || mc_header->sb_dev_id) { - pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " - "specific code not yet supported\n", cpu); + pr_err("CPU%d: loading of chipset specific code not yet supported\n", + cpu); return 0; } @@ -141,12 +144,12 @@ static int apply_microcode_amd(int cpu) /* check current patch id and patch's id for match */ if (rev != mc_amd->hdr.patch_id) { - pr_err("microcode: CPU%d: update failed " - "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); + pr_err("CPU%d: update failed (for patch_level=0x%x)\n", + cpu, mc_amd->hdr.patch_id); return -1; } - pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); + pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); uci->cpu_sig.rev = rev; return 0; @@ -169,15 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; if (section_hdr[0] != UCODE_UCODE_TYPE) { - pr_err("microcode: error: invalid type field in " - "container file section header\n"); + pr_err("error: invalid type field in container file section header\n"); return NULL; } total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); if (total_size > size || total_size > UCODE_MAX_SIZE) { - pr_err("microcode: error: size mismatch\n"); + pr_err("error: size mismatch\n"); return NULL; } @@ -206,14 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf) size = buf_pos[2]; if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("microcode: error: invalid type field in " - "container file section header\n"); + pr_err("error: invalid type field in container file section header\n"); return 0; } equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); if (!equiv_cpu_table) { - pr_err("microcode: failed to allocate equivalent CPU table\n"); + pr_err("failed to allocate equivalent CPU table\n"); return 0; } @@ -246,7 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) offset = install_equiv_cpu_table(ucode_ptr); if (!offset) { - pr_err("microcode: failed to create equivalent cpu table\n"); + pr_err("failed to create equivalent cpu table\n"); return UCODE_ERROR; } @@ -277,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) if (!leftover) { vfree(uci->mc); uci->mc = new_mc; - pr_debug("microcode: CPU%d found a matching microcode " - "update with version 0x%x (current=0x%x)\n", + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); } else { vfree(new_mc); @@ -300,7 +300,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; if (*(u32 *)firmware->data != UCODE_MAGIC) { - pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", + pr_err("invalid UCODE_MAGIC (0x%08x)\n", *(u32 *)firmware->data); return UCODE_ERROR; } @@ -313,8 +313,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { - pr_info("microcode: AMD microcode update via " - "/dev/cpu/microcode not supported\n"); + pr_info("AMD microcode update via /dev/cpu/microcode not supported\n"); return UCODE_ERROR; } @@ -334,14 +333,13 @@ void init_microcode_amd(struct device *device) WARN_ON(c->x86_vendor != X86_VENDOR_AMD); if (c->x86 < 0x10) { - pr_warning("microcode: AMD CPU family 0x%x not supported\n", - c->x86); + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); return; } supported_cpu = 1; if (request_firmware(&firmware, fw_name, device)) - pr_err("microcode: failed to load file %s\n", fw_name); + pr_err("failed to load file %s\n", fw_name); } void fini_microcode_amd(void) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index e68aae397869..844c02c65fcb 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -70,6 +70,9 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -209,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; if ((len >> PAGE_SHIFT) > totalram_pages) { - pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); + pr_err("too much data (max %ld pages)\n", totalram_pages); return ret; } @@ -244,7 +247,7 @@ static int __init microcode_dev_init(void) error = misc_register(µcode_dev); if (error) { - pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); + pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); return error; } @@ -359,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu) if (!uci->mc) return UCODE_NFOUND; - pr_debug("microcode: CPU%d updated upon resume\n", cpu); + pr_debug("CPU%d updated upon resume\n", cpu); apply_microcode_on_target(cpu); return UCODE_OK; @@ -379,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu) ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); if (ustate == UCODE_OK) { - pr_debug("microcode: CPU%d updated upon init\n", cpu); + pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } @@ -406,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d added\n", cpu); + pr_debug("CPU%d added\n", cpu); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) @@ -425,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d removed\n", cpu); + pr_debug("CPU%d removed\n", cpu); microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; @@ -473,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) microcode_update_cpu(cpu); case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - pr_debug("microcode: CPU%d added\n", cpu); + pr_debug("CPU%d added\n", cpu); if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - pr_err("microcode: Failed to create group for CPU%d\n", cpu); + pr_err("Failed to create group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - pr_debug("microcode: CPU%d removed\n", cpu); + pr_debug("CPU%d removed\n", cpu); break; case CPU_DEAD: case CPU_UP_CANCELED_FROZEN: @@ -507,7 +510,7 @@ static int __init microcode_init(void) microcode_ops = init_amd_microcode(); if (!microcode_ops) { - pr_err("microcode: no support for this CPU vendor\n"); + pr_err("no support for this CPU vendor\n"); return -ENODEV; } @@ -541,8 +544,7 @@ static int __init microcode_init(void) register_hotcpu_notifier(&mc_cpu_notifier); pr_info("Microcode Update Driver: v" MICROCODE_VERSION - " ," - " Peter Oruba\n"); + " , Peter Oruba\n"); return 0; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0d334ddd0a96..ebd193e476ca 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,6 +70,9 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); + pr_err("CPU%d not a capable Intel processor\n", cpu_num); return -1; } @@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); - printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", - cpu_num, csig->sig, csig->pf, csig->rev); + pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", + cpu_num, csig->sig, csig->pf, csig->rev); return 0; } @@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc) data_size = get_datasize(mc_header); if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); + pr_err("error! Bad data size in microcode data file\n"); return -EINVAL; } if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); + pr_err("error! Unknown microcode update format\n"); return -EINVAL; } ext_table_size = total_size - (MC_HEADER_SIZE + data_size); if (ext_table_size) { if ((ext_table_size < EXT_HEADER_SIZE) || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); + pr_err("error! Small exttable size in microcode data file\n"); return -EINVAL; } ext_header = mc + MC_HEADER_SIZE + data_size; if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); + pr_err("error! Bad exttable size in microcode data file\n"); return -EFAULT; } ext_sigcount = ext_header->count; @@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc) while (i--) ext_table_sum += ext_tablep[i]; if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); + pr_warning("aborting, bad extended signature table checksum\n"); return -EINVAL; } } @@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc) while (i--) orig_sum += ((int *)mc)[i]; if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); + pr_err("aborting, bad checksum\n"); return -EINVAL; } if (!ext_table_size) @@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc) - (mc_header->sig + mc_header->pf + mc_header->cksum) + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); + pr_err("aborting, bad checksum\n"); return -EINVAL; } } @@ -327,13 +324,11 @@ static int apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); if (val[1] != mc_intel->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update " - "to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); + pr_err("CPU%d update to revision 0x%x failed\n", + cpu_num, mc_intel->hdr.rev); return -1; } - printk(KERN_INFO "microcode: CPU%d updated to revision " - "0x%x, date = %04x-%02x-%02x \n", + pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", cpu_num, val[1], mc_intel->hdr.date & 0xffff, mc_intel->hdr.date >> 24, @@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, mc_size = get_totalsize(&mc_header); if (!mc_size || mc_size > leftover) { - printk(KERN_ERR "microcode: error!" - "Bad data in microcode data file\n"); + pr_err("error! Bad data in microcode data file\n"); break; } @@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); out: return state; } @@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) c->x86, c->x86_model, c->x86_mask); if (request_firmware(&firmware, name, device)) { - pr_debug("microcode: data file %s load failed\n", name); + pr_debug("data file %s load failed\n", name); return UCODE_NFOUND; } -- cgit v1.2.3 From 44234adcdce38f83c56e05f808ce656175b4beeb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Dec 2009 09:25:48 +0100 Subject: hw-breakpoints: Modify breakpoints without unregistering them Currently, when ptrace needs to modify a breakpoint, like disabling it, changing its address, type or len, it calls modify_user_hw_breakpoint(). This latter will perform the heavy and racy task of unregistering the old breakpoint and registering a new one. This is racy as someone else might steal the reserved breakpoint slot under us, which is undesired as the breakpoint is only supposed to be modified, sometimes in the middle of a debugging workflow. We don't want our slot to be stolen in the middle. So instead of unregistering/registering the breakpoint, just disable it while we modify its breakpoint fields and re-enable it after if necessary. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Prasad LKML-Reference: <1260347148-5519-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 57 ++++++++++++++++++++----------------------- include/linux/hw_breakpoint.h | 4 +-- include/linux/perf_event.h | 5 +++- kernel/hw_breakpoint.c | 42 +++++++++++++++++++++++-------- kernel/perf_event.c | 4 +-- 5 files changed, 66 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b361d28061d0..7079ddaf0731 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -595,7 +595,7 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } -static struct perf_event * +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, struct task_struct *tsk, int disabled) { @@ -609,11 +609,11 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, * written the address register first */ if (!bp) - return ERR_PTR(-EINVAL); + return -EINVAL; err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (err) - return ERR_PTR(err); + return err; attr = bp->attr; attr.bp_len = gen_len; @@ -658,28 +658,17 @@ restore: if (!second_pass) continue; - thread->ptrace_bps[i] = NULL; - bp = ptrace_modify_breakpoint(bp, len, type, + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 1); - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - thread->ptrace_bps[i] = NULL; + if (rc) break; - } - thread->ptrace_bps[i] = bp; } continue; } - bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); - - /* Incorrect bp, or we have a bug in bp API */ - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - thread->ptrace_bps[i] = NULL; + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + if (rc) break; - } - thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -737,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, attr.disabled = 1; bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + + /* + * CHECKME: the previous code returned -EIO if the addr wasn't + * a valid task virtual addr. The new one will return -EINVAL in + * this case. + * -EINVAL may be what we want for in-kernel breakpoints users, + * but -EIO looks better for ptrace, since we refuse a register + * writing for the user. And anyway this is the previous + * behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; } else { + int err; + bp = t->ptrace_bps[nr]; - t->ptrace_bps[nr] = NULL; attr = bp->attr; attr.bp_addr = addr; - bp = modify_user_hw_breakpoint(bp, &attr); + err = modify_user_hw_breakpoint(bp, &attr); + if (err) + return err; } - /* - * CHECKME: the previous code returned -EIO if the addr wasn't a - * valid task virtual addr. The new one will return -EINVAL in this - * case. - * -EINVAL may be what we want for in-kernel breakpoints users, but - * -EIO looks better for ptrace, since we refuse a register writing - * for the user. And anyway this is the previous behaviour. - */ - if (IS_ERR(bp)) - return PTR_ERR(bp); - t->ptrace_bps[nr] = bp; return 0; } diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 42da1ce19ec0..69f07a9f1277 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -55,7 +55,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ -extern struct perf_event * +extern int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr); /* @@ -91,7 +91,7 @@ static inline struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, struct task_struct *tsk) { return NULL; } -static inline struct perf_event * +static inline int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { return NULL; } static inline struct perf_event * diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bf3329413e18..64a53f74c9a9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -872,6 +872,8 @@ extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); +extern void perf_event_enable(struct perf_event *event); +extern void perf_event_disable(struct perf_event *event); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,7 +904,8 @@ static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } - +static inline void perf_event_enable(struct perf_event *event) { } +static inline void perf_event_disable(struct perf_event *event) { } #endif #define perf_output_put(handle, x) \ diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03a0773ac2b2..366eedf949c0 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -320,18 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs */ -struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) +int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { - /* - * FIXME: do it without unregistering - * - We don't want to lose our slot - * - If the new bp is incorrect, don't lose the older one - */ - unregister_hw_breakpoint(bp); + u64 old_addr = bp->attr.bp_addr; + int old_type = bp->attr.bp_type; + int old_len = bp->attr.bp_len; + int err = 0; + + perf_event_disable(bp); + + bp->attr.bp_addr = attr->bp_addr; + bp->attr.bp_type = attr->bp_type; + bp->attr.bp_len = attr->bp_len; + + if (attr->disabled) + goto end; - return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid, - bp->overflow_handler); + err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + if (!err) + perf_event_enable(bp); + + if (err) { + bp->attr.bp_addr = old_addr; + bp->attr.bp_type = old_type; + bp->attr.bp_len = old_len; + if (!bp->attr.disabled) + perf_event_enable(bp); + + return err; + } + +end: + bp->attr.disabled = attr->disabled; + + return 0; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fd43ff4ac860..3b0cf86eee84 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -567,7 +567,7 @@ static void __perf_event_disable(void *info) * is the current context on this CPU and preemption is disabled, * hence we can't get into perf_event_task_sched_out for this context. */ -static void perf_event_disable(struct perf_event *event) +void perf_event_disable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -971,7 +971,7 @@ static void __perf_event_enable(void *info) * perf_event_for_each_child or perf_event_for_each as described * for perf_event_disable. */ -static void perf_event_enable(struct perf_event *event) +void perf_event_enable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; -- cgit v1.2.3 From 814e2c84a722c45650a9b8f52285d7ba6874f63b Mon Sep 17 00:00:00 2001 From: Andy Isaacson Date: Tue, 8 Dec 2009 00:29:42 -0800 Subject: x86: Factor duplicated code out of __show_regs() into show_regs_common() Unify x86_32 and x86_64 implementations of __show_regs() header, standardizing on the x86_64 format string in the process. Also, 32-bit will now call print_modules. Signed-off-by: Andy Isaacson Cc: Arjan van de Ven Cc: Robert Hancock Cc: Richard Zidlicky Cc: Andrew Morton LKML-Reference: <20091208082942.GA27174@hexapodia.org> [ v2: resolved conflict ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 1 + arch/x86/kernel/process.c | 18 ++++++++++++++++++ arch/x86/kernel/process_32.c | 14 +------------- arch/x86/kernel/process_64.c | 16 ++-------------- 4 files changed, 22 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 022a84386de8..ecb544e65382 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev, struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +extern void show_regs_common(void); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e2ba634ea15..90cf1250a005 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -90,6 +92,22 @@ void exit_thread(void) } } +void show_regs_common(void) +{ + const char *board; + + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + + printk("\n"); + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, board); +} + void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 075580b35682..120b88797a75 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -128,7 +126,6 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; - const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -140,16 +137,7 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - printk("\n"); - - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!board) - board = ""; - printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", - task_pid_nr(current), current->comm, - print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + show_regs_common(); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c95c8f4e790a..e5ab0cd0ef36 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -38,7 +37,6 @@ #include #include #include -#include #include #include @@ -163,18 +161,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; - const char *board; - - printk("\n"); - print_modules(); - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!board) - board = ""; - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + + show_regs_common(); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, -- cgit v1.2.3 From a1884b8e558ef6395f6033f9e1b69b332dd040e0 Mon Sep 17 00:00:00 2001 From: Andy Isaacson Date: Tue, 8 Dec 2009 00:30:21 -0800 Subject: x86: Print DMI_BOARD_NAME as well as DMI_PRODUCT_NAME from __show_regs() Robert Hancock observes that DMI_BOARD_NAME is often more useful than DMI_PRODUCT_NAME, especially on standalone motherboards. So, print both. Signed-off-by: Andy Isaacson Cc: Arjan van de Ven Cc: Robert Hancock Cc: Richard Zidlicky Cc: Andrew Morton LKML-Reference: <20091208083021.GB27174@hexapodia.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 90cf1250a005..7a7bd4e3ec49 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -94,18 +94,21 @@ void exit_thread(void) void show_regs_common(void) { - const char *board; + const char *board, *product; - board = dmi_get_system_info(DMI_PRODUCT_NAME); + board = dmi_get_system_info(DMI_BOARD_NAME); if (!board) board = ""; + product = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!product) + product = ""; printk("\n"); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + init_utsname()->version, board, product); } void flush_thread(void) -- cgit v1.2.3 From e258e4e0b495e6ecbd073d6bef1eafb62a58919a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:51 -0500 Subject: x86-32: Add new pt_regs stubs Add new stubs which add the pt_regs pointer as the last arg, matching 64-bit. This will allow these syscalls to be easily merged. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-2-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 49 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 50b9c220e121..34dbfa909dd7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -725,22 +725,49 @@ END(syscall_badsys) /* * System calls that need a pt_regs pointer. */ -#define PTREGSCALL(name) \ +#define PTREGSCALL0(name) \ ALIGN; \ ptregs_##name: \ leal 4(%esp),%eax; \ jmp sys_##name; -PTREGSCALL(iopl) -PTREGSCALL(fork) -PTREGSCALL(clone) -PTREGSCALL(vfork) -PTREGSCALL(execve) -PTREGSCALL(sigaltstack) -PTREGSCALL(sigreturn) -PTREGSCALL(rt_sigreturn) -PTREGSCALL(vm86) -PTREGSCALL(vm86old) +#define PTREGSCALL1(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%edx; \ + movl PT_EBX(%edx),%eax; \ + jmp sys_##name; + +#define PTREGSCALL2(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%ecx; \ + movl PT_ECX(%ecx),%edx; \ + movl PT_EBX(%ecx),%eax; \ + jmp sys_##name; + +#define PTREGSCALL3(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%eax; \ + pushl %eax; \ + movl PT_EDX(%eax),%ecx; \ + movl PT_ECX(%eax),%edx; \ + movl PT_EBX(%eax),%eax; \ + call sys_##name; \ + addl $4,%esp; \ + ret + +PTREGSCALL0(iopl) +PTREGSCALL0(fork) +PTREGSCALL0(clone) +PTREGSCALL0(vfork) +PTREGSCALL0(execve) +PTREGSCALL0(sigaltstack) +PTREGSCALL0(sigreturn) +PTREGSCALL0(rt_sigreturn) +PTREGSCALL0(vm86) +PTREGSCALL0(vm86old) .macro FIXUP_ESPFIX_STACK /* -- cgit v1.2.3 From 27f59559d63375a4d59e7c720a439d9f0b47edad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:52 -0500 Subject: x86: Merge sys_iopl Change 32-bit sys_iopl to PTREGSCALL1, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-3-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 6 +----- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/ioport.c | 28 +++++----------------------- 3 files changed, 7 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 372b76edd63f..4b694cd904c4 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -18,6 +18,7 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); +long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); @@ -35,8 +36,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 -/* kernel/ioport.c */ -long sys_iopl(struct pt_regs *); /* kernel/process_32.c */ int sys_clone(struct pt_regs *); @@ -70,9 +69,6 @@ int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ /* X86_64 only */ -/* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - /* kernel/process_64.c */ asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 34dbfa909dd7..ab7fcef37453 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -758,7 +758,7 @@ ptregs_##name: \ addl $4,%esp; \ ret -PTREGSCALL0(iopl) +PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(clone) PTREGSCALL0(vfork) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 99c4d308f16b..85ecc7c57ba6 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -103,9 +103,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * on system-call entry - see also fork() and the signal handling * code. */ -static int do_iopl(unsigned int level, struct pt_regs *regs) +long sys_iopl(unsigned int level, struct pt_regs *regs) { unsigned int old = (regs->flags >> 12) & 3; + struct thread_struct *t = ¤t->thread; if (level > 3) return -EINVAL; @@ -115,29 +116,10 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) return -EPERM; } regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); - - return 0; -} - #ifdef CONFIG_X86_32 -long sys_iopl(struct pt_regs *regs) -{ - unsigned int level = regs->bx; - struct thread_struct *t = ¤t->thread; - int rc; - - rc = do_iopl(level, regs); - if (rc < 0) - goto out; - t->iopl = level << 12; set_iopl_mask(t->iopl); -out: - return rc; -} -#else -asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) -{ - return do_iopl(level, regs); -} #endif + + return 0; +} -- cgit v1.2.3 From 11cf88bd0b8165b65aaabaee0977e9a3ad474ab7 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:53 -0500 Subject: x86: Merge sys_execve Change 32-bit sys_execve to PTREGSCALL3, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 6 ++---- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/process.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 25 ------------------------- arch/x86/kernel/process_64.c | 19 ------------------- 5 files changed, 29 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 4b694cd904c4..48c48e508b9f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,6 +23,8 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); +long sys_execve(char __user *, char __user * __user *, + char __user * __user *, struct pt_regs *); /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); @@ -39,7 +41,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* kernel/process_32.c */ int sys_clone(struct pt_regs *); -int sys_execve(struct pt_regs *); /* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); @@ -73,9 +74,6 @@ int sys_vm86(struct pt_regs *); asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); -asmlinkage long sys_execve(char __user *, char __user * __user *, - char __user * __user *, - struct pt_regs *); long sys_arch_prctl(int, unsigned long); /* kernel/signal.c */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index ab7fcef37453..a96a0d8a0fdb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -762,7 +762,7 @@ PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(clone) PTREGSCALL0(vfork) -PTREGSCALL0(execve) +PTREGSCALL3(execve) PTREGSCALL0(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e2ba634ea15..bb17bd9334fb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -235,6 +235,32 @@ int sys_vfork(struct pt_regs *regs) } +/* + * sys_execve() executes a new program. + */ +long sys_execve(char __user *name, char __user * __user *argv, + char __user * __user *envp, struct pt_regs *regs) +{ + long error; + char *filename; + + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + error = do_execve(filename, argv, envp, regs); + +#ifdef CONFIG_X86_32 + if (error == 0) { + /* Make sure we don't return using sysenter.. */ + set_thread_flag(TIF_IRET); + } +#endif + + putname(filename); + return error; +} + /* * Idle related variables and functions */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 075580b35682..486e38e2900b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -451,31 +451,6 @@ int sys_clone(struct pt_regs *regs) return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); } -/* - * sys_execve() executes a new program. - */ -int sys_execve(struct pt_regs *regs) -{ - int error; - char *filename; - - filename = getname((char __user *) regs->bx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, - (char __user * __user *) regs->cx, - (char __user * __user *) regs->dx, - regs); - if (error == 0) { - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } - putname(filename); -out: - return error; -} - #define top_esp (THREAD_SIZE - sizeof(unsigned long)) #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c95c8f4e790a..671960d82587 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -520,25 +520,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -/* - * sys_execve() executes a new program. - */ -asmlinkage -long sys_execve(char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = do_execve(filename, argv, envp, regs); - putname(filename); - return error; -} - void set_personality_64bit(void) { /* inherit personality from parent */ -- cgit v1.2.3 From 052acad48a566a6dbcccb95e5d22e5e1b7cac8dd Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:54 -0500 Subject: x86: Merge sys_sigaltstack Change 32-bit sys_sigaltstack to PTREGSCALL2, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 8 +++----- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/signal.c | 12 +----------- 3 files changed, 5 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 48c48e508b9f..94e0b61fb040 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -31,6 +31,9 @@ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ long sys_rt_sigreturn(struct pt_regs *); +long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); + /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); @@ -46,7 +49,6 @@ int sys_clone(struct pt_regs *); asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); /* kernel/sys_i386_32.c */ @@ -76,10 +78,6 @@ asmlinkage long sys_clone(unsigned long, unsigned long, struct pt_regs *); long sys_arch_prctl(int, unsigned long); -/* kernel/signal.c */ -asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, - struct pt_regs *); - /* kernel/sys_x86_64.c */ struct new_utsname; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a96a0d8a0fdb..621ef4599416 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -763,7 +763,7 @@ PTREGSCALL0(fork) PTREGSCALL0(clone) PTREGSCALL0(vfork) PTREGSCALL3(execve) -PTREGSCALL0(sigaltstack) +PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) PTREGSCALL0(vm86) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 74fe6d86dc5d..4fd173cd8e57 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -545,22 +545,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, } #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_32 -int sys_sigaltstack(struct pt_regs *regs) -{ - const stack_t __user *uss = (const stack_t __user *)regs->bx; - stack_t __user *uoss = (stack_t __user *)regs->cx; - - return do_sigaltstack(uss, uoss, regs->sp); -} -#else /* !CONFIG_X86_32 */ -asmlinkage long +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) { return do_sigaltstack(uss, uoss, regs->sp); } -#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. -- cgit v1.2.3 From f1382f157fb1175bba008abad0907310a1e459ce Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:55 -0500 Subject: x86, 32-bit: Convert sys_vm86 & sys_vm86old Convert these to new PTREGSCALL stubs. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 4 ++-- arch/x86/kernel/entry_32.S | 4 ++-- arch/x86/kernel/vm86_32.c | 11 +++++------ 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 94e0b61fb040..df2c51106565 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -66,8 +66,8 @@ asmlinkage int sys_uname(struct old_utsname __user *); asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ -int sys_vm86old(struct pt_regs *); -int sys_vm86(struct pt_regs *); +int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); +int sys_vm86(unsigned long, unsigned long, struct pt_regs *); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 621ef4599416..6c2f25d9b9d5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -766,8 +766,8 @@ PTREGSCALL3(execve) PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) -PTREGSCALL0(vm86) -PTREGSCALL0(vm86old) +PTREGSCALL2(vm86) +PTREGSCALL1(vm86old) .macro FIXUP_ESPFIX_STACK /* diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e62539058..5ffb5622f793 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,9 +197,8 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -int sys_vm86old(struct pt_regs *regs) +int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) { - struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -227,7 +226,7 @@ out: } -int sys_vm86(struct pt_regs *regs) +int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -239,12 +238,12 @@ int sys_vm86(struct pt_regs *regs) struct vm86plus_struct __user *v86; tsk = current; - switch (regs->bx) { + switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); + ret = do_vm86_irq_handling(cmd, (int)arg); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -261,7 +260,7 @@ int sys_vm86(struct pt_regs *regs) ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)regs->cx; + v86 = (struct vm86plus_struct __user *)arg; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); -- cgit v1.2.3 From f839bbc5c81b1c92ff8e81c360e9564f7b961b2e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:56 -0500 Subject: x86: Merge sys_clone Change 32-bit sys_clone to new PTREGSCALL stub, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-7-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 8 ++------ arch/x86/kernel/entry_32.S | 14 +++++++++++++- arch/x86/kernel/process.c | 9 +++++++++ arch/x86/kernel/process_32.c | 15 --------------- arch/x86/kernel/process_64.c | 9 --------- 5 files changed, 24 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index df2c51106565..b0ce78061708 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -25,6 +25,8 @@ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); +long sys_clone(unsigned long, unsigned long, void __user *, + void __user *, struct pt_regs *); /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); @@ -42,9 +44,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 -/* kernel/process_32.c */ -int sys_clone(struct pt_regs *); - /* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, @@ -73,9 +72,6 @@ int sys_vm86(unsigned long, unsigned long, struct pt_regs *); /* X86_64 only */ /* kernel/process_64.c */ -asmlinkage long sys_clone(unsigned long, unsigned long, - void __user *, void __user *, - struct pt_regs *); long sys_arch_prctl(int, unsigned long); /* kernel/sys_x86_64.c */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6c2f25d9b9d5..6492555d123d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -760,7 +760,6 @@ ptregs_##name: \ PTREGSCALL1(iopl) PTREGSCALL0(fork) -PTREGSCALL0(clone) PTREGSCALL0(vfork) PTREGSCALL3(execve) PTREGSCALL2(sigaltstack) @@ -769,6 +768,19 @@ PTREGSCALL0(rt_sigreturn) PTREGSCALL2(vm86) PTREGSCALL1(vm86old) +/* Clone is an oddball. The 4th arg is in %edi */ + ALIGN; +ptregs_clone: + leal 4(%esp),%eax + pushl %eax + pushl PT_EDI(%eax) + movl PT_EDX(%eax),%ecx + movl PT_ECX(%eax),%edx + movl PT_EBX(%eax),%eax + call sys_clone + addl $8,%esp + ret + .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bb17bd9334fb..f3c1a6b3a65e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -234,6 +234,15 @@ int sys_vfork(struct pt_regs *regs) NULL, NULL); } +long +sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) +{ + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); +} + /* * sys_execve() executes a new program. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 486e38e2900b..506d5a7ba17c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -436,21 +436,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -int sys_clone(struct pt_regs *regs) -{ - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs->bx; - newsp = regs->cx; - parent_tidptr = (int __user *)regs->dx; - child_tidptr = (int __user *)regs->di; - if (!newsp) - newsp = regs->sp; - return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); -} - #define top_esp (THREAD_SIZE - sizeof(unsigned long)) #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 671960d82587..83019f94b83d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -534,15 +534,6 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } -asmlinkage long -sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) -{ - if (!newsp) - newsp = regs->sp; - return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); -} - unsigned long get_wchan(struct task_struct *p) { unsigned long stack; -- cgit v1.2.3 From ce9119ad90b1caba550447bfcc0a21850558ca49 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 9 Dec 2009 16:33:44 -0800 Subject: x86-32: Avoid pipeline serialization in PTREGSCALL1 and 2 In the PTREGSCALL1 and 2 macros, we can trivially avoid an unnecessary pipeline serialization, so do so. In PTREGSCALLS3 this is much less clear-cut since we have to push a new value to the stack. Leave it alone for now assuming it is as good as it is going to be; may want to check on Atom or another in-order x86 to see if we can do better. Signed-off-by: H. Peter Anvin Cc: Brian Gerst LKML-Reference: <1260403316-5679-2-git-send-email-brgerst@gmail.com> --- arch/x86/kernel/entry_32.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6492555d123d..cb12b9bfc9cc 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -735,15 +735,15 @@ ptregs_##name: \ ALIGN; \ ptregs_##name: \ leal 4(%esp),%edx; \ - movl PT_EBX(%edx),%eax; \ + movl (PT_EBX+4)(%esp),%eax; \ jmp sys_##name; #define PTREGSCALL2(name) \ ALIGN; \ ptregs_##name: \ leal 4(%esp),%ecx; \ - movl PT_ECX(%ecx),%edx; \ - movl PT_EBX(%ecx),%eax; \ + movl (PT_ECX+4)(%esp),%edx; \ + movl (PT_EBX+4)(%esp),%eax; \ jmp sys_##name; #define PTREGSCALL3(name) \ -- cgit v1.2.3 From fc380ceed7fe469728ea4acdbda4495ea943ee1c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 9 Dec 2009 16:54:08 -0800 Subject: x86-64, paravirt: Call set_iopl_mask() on 64 bits set_iopl_mask() is a no-op on 64 bits, but it is also a paravirt hook, so call it even on 64 bits. Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Brian Gerst LKML-Reference: <1260403316-5679-3-git-send-email-brgerst@gmail.com> --- arch/x86/kernel/ioport.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 85ecc7c57ba6..8eec0ec59af2 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -116,10 +116,8 @@ long sys_iopl(unsigned int level, struct pt_regs *regs) return -EPERM; } regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); -#ifdef CONFIG_X86_32 t->iopl = level << 12; set_iopl_mask(t->iopl); -#endif return 0; } -- cgit v1.2.3 From 5cd476effe9570d2e520cf904d51f5c992972647 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:33 -0800 Subject: x86: es7000_32.c: Use pr_ and add pr_fmt(fmt) - Added #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Converted a few printk(KERN_INFO to pr_info( - Stripped "es7000_mipcfg" from pr_debug Signed-off-by: Joe Perches LKML-Reference: <3b4375af246dec5941168858910210937c110af9.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index e85f8fb7f8e7..dd2b5f264643 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -27,6 +27,9 @@ * * http://www.unisys.com */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr) mip_addr = val; mip = (struct mip_reg *)val; mip_reg = __va(mip); - pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + pr_debug("host_reg = 0x%lx\n", (unsigned long)host_reg); - pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + pr_debug("mip_reg = 0x%lx\n", (unsigned long)mip_reg); success++; break; @@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void) if (!es7000_plat) return; - printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); + pr_info("Enabling APIC mode.\n"); memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); es7000_mip_reg.off_0x00 = MIP_SW_APIC; es7000_mip_reg.off_0x38 = MIP_VALID; @@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk(KERN_INFO - "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); -- cgit v1.2.3 From 40685236b3161b80030a4df34bcbc5941ea59876 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:34 -0800 Subject: x86: setup_percpu.c: Use pr_ and add pr_fmt(fmt) - Added #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Stripped PERCPU: from a pr_warning Signed-off-by: Joe Perches LKML-Reference: <7ead24eccbea8f2b11795abad3e2893a98e1e111.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d559af913e1f..35abcb8b00e9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -20,9 +22,9 @@ #include #ifdef CONFIG_DEBUG_PER_CPU_MAPS -# define DBG(x...) printk(KERN_DEBUG x) +# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) #else -# define DBG(x...) +# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) #endif DEFINE_PER_CPU(int, cpu_number); @@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } else { ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", + cpu, size, node, __pa(ptr)); } return ptr; #else @@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void) pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", + pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) -- cgit v1.2.3 From a78d9626f4f6fa7904bfdb071205080743125983 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:35 -0800 Subject: x86: i8254.c: Add pr_fmt(fmt) - Add pr_fmt(fmt) "pit: " fmt - Strip pit: prefixes from pr_debug Signed-off-by: Joe Perches LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kvm/i8254.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index fab7440c9bb2..296aba49472a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -29,6 +29,8 @@ * Based on QEMU and Xen. */ +#define pr_fmt(fmt) "pit: " fmt + #include #include "irq.h" @@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) static void destroy_pit_timer(struct kvm_timer *pt) { - pr_debug("pit: execute del timer!\n"); + pr_debug("execute del timer!\n"); hrtimer_cancel(&pt->timer); } @@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); - pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); + pr_debug("create pit timer, interval is %llu nsec\n", interval); /* TODO The new value only affected after the retriggered */ hrtimer_cancel(&pt->timer); @@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) WARN_ON(!mutex_is_locked(&ps->lock)); - pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); + pr_debug("load_count val is %d, channel is %d\n", val, channel); /* * The largest possible initial count is 0; this is equivalent @@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this, mutex_lock(&pit_state->lock); if (val != 0) - pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", - (unsigned int)addr, len, val); + pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n", + (unsigned int)addr, len, val); if (addr == 3) { channel = val >> 6; -- cgit v1.2.3 From 1bd591a5f17f546121fcf0015d72cc3e9c49cc29 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:36 -0800 Subject: x86: kmmio.c: Add and use pr_fmt(fmt) - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Strip "kmmio: " from pr_s Signed-off-by: Joe Perches LKML-Reference: <7aa509f8a23933036d39f54bd51e9acc52068049.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 88612cdcdc3b..68c3e89af5c2 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -5,6 +5,8 @@ * 2008 Pekka Paalanen */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) pte_t *pte = lookup_address(f->page, &level); if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", f->page); + pr_err("no pte for page 0x%08lx\n", f->page); return -1; } @@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) clear_pte_presence(pte, clear, &f->old_presence); break; default: - pr_err("kmmio: unexpected page level 0x%x.\n", level); + pr_err("unexpected page level 0x%x.\n", level); return -1; } @@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret; - WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), + f->page); f->armed = true; return ret; } @@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * condition needs handling by do_page_fault(), the * page really not being present is the most common. */ - pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", - addr, smp_processor_id()); + pr_debug("secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); if (!faultpage->old_presence) - pr_info("kmmio: unexpected secondary hit for " - "address 0x%08lx on CPU %d.\n", addr, - smp_processor_id()); + pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", + addr, smp_processor_id()); } else { /* * Prevent overwriting already in-flight context. * This should not happen, let's hope disarming at * least prevents a panic. */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " - "for address 0x%08lx. Ignoring.\n", - smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", + smp_processor_id(), addr); + pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); disarm_kmmio_fault_page(faultpage); } goto no_kmmio_ctx; @@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * something external causing them (f.e. using a debugger while * mmio tracing enabled), or erroneous behaviour */ - pr_warning("kmmio: unexpected debug trap on CPU %d.\n", - smp_processor_id()); + pr_warning("unexpected debug trap on CPU %d.\n", + smp_processor_id()); goto out; } @@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p) list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) - pr_err("kmmio: Unable to set page fault.\n"); + pr_err("Unable to set page fault.\n"); size += PAGE_SIZE; } out: @@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p) drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { - pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + pr_crit("leaking kmmio_fault_page objects.\n"); return; } drelease->release_list = release_list; -- cgit v1.2.3 From 3a0340be06a9356eb61f6804107480acbe62c069 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 9 Dec 2009 10:45:37 -0800 Subject: x86: mmio-mod.c: Use pr_fmt - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Remove #define NAME - Remove NAME from pr_ Signed-off-by: Joe Perches LKML-Reference: <009cb214c45ef932df0242856228f4739cc91408.1260383912.git.joe@perches.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 71 +++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 132772a8ec57..4c765e9c4664 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -19,6 +19,9 @@ * * Derived from the read-mod example from relay-examples by Tom Zanussi. */ + +#define pr_fmt(fmt) "mmiotrace: " + #define DEBUG 1 #include @@ -36,8 +39,6 @@ #include "pf_in.h" -#define NAME "mmiotrace: " - struct trap_reason { unsigned long addr; unsigned long ip; @@ -96,17 +97,18 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", - __func__, address); + pr_err("Error in %s: no pte for page 0x%08lx\n", + __func__, address); return; } if (level == PG_LEVEL_2M) { - pr_emerg(NAME "4MB pages are not currently supported: " - "0x%08lx\n", address); + pr_emerg("4MB pages are not currently supported: 0x%08lx\n", + address); BUG(); } - pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, + pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", + address, (unsigned long long)pte_val(*pte), (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); } @@ -118,22 +120,21 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(NAME "unexpected fault for address: 0x%08lx, " - "last fault for address: 0x%08lx\n", - addr, my_reason->addr); + pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", + addr, my_reason->addr); print_pte(addr); print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); #ifdef __i386__ pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); + regs->ax, regs->bx, regs->cx, regs->dx); pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->si, regs->di, regs->bp, regs->sp); + regs->si, regs->di, regs->bp, regs->sp); #else pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", - regs->ax, regs->cx, regs->dx); + regs->ax, regs->cx, regs->dx); pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->si, regs->di, regs->bp, regs->sp); + regs->si, regs->di, regs->bp, regs->sp); #endif put_cpu_var(pf_reason); BUG(); @@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - pr_emerg(NAME "unexpected post handler"); + pr_emerg("unexpected post handler"); BUG(); } @@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, }; if (!trace) { - pr_err(NAME "kmalloc failed in ioremap\n"); + pr_err("kmalloc failed in ioremap\n"); return; } @@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size, if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", - (unsigned long long)offset, size, addr); + pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; ioremap_trace_core(offset, size, addr); @@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr) struct remap_trace *tmp; struct remap_trace *found_trace = NULL; - pr_debug(NAME "Unmapping %p.\n", addr); + pr_debug("Unmapping %p.\n", addr); spin_lock_irq(&trace_lock); if (!is_enabled()) @@ -363,9 +364,8 @@ static void clear_trace_list(void) * Caller also ensures is_enabled() cannot change. */ list_for_each_entry(trace, &trace_list, list) { - pr_notice(NAME "purging non-iounmapped " - "trace @0x%08lx, size 0x%lx.\n", - trace->probe.addr, trace->probe.len); + pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); if (!nommiotrace) unregister_kmmio_probe(&trace->probe); } @@ -387,7 +387,7 @@ static void enter_uniprocessor(void) if (downed_cpus == NULL && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { - pr_notice(NAME "Failed to allocate mask\n"); + pr_notice("Failed to allocate mask\n"); goto out; } @@ -395,20 +395,19 @@ static void enter_uniprocessor(void) cpumask_copy(downed_cpus, cpu_online_mask); cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); if (num_online_cpus() > 1) - pr_notice(NAME "Disabling non-boot CPUs...\n"); + pr_notice("Disabling non-boot CPUs...\n"); put_online_cpus(); for_each_cpu(cpu, downed_cpus) { err = cpu_down(cpu); if (!err) - pr_info(NAME "CPU%d is down.\n", cpu); + pr_info("CPU%d is down.\n", cpu); else - pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); + pr_err("Error taking CPU%d down: %d\n", cpu, err); } out: if (num_online_cpus() > 1) - pr_warning(NAME "multiple CPUs still online, " - "may miss events.\n"); + pr_warning("multiple CPUs still online, may miss events.\n"); } /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, @@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void) if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) return; - pr_notice(NAME "Re-enabling CPUs...\n"); + pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { err = cpu_up(cpu); if (!err) - pr_info(NAME "enabled CPU%d.\n", cpu); + pr_info("enabled CPU%d.\n", cpu); else - pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); + pr_err("cannot re-enable CPU%d: %d\n", cpu, err); } } @@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void) static void enter_uniprocessor(void) { if (num_online_cpus() > 1) - pr_warning(NAME "multiple CPUs are online, may miss events. " - "Suggest booting with maxcpus=1 kernel argument.\n"); + pr_warning("multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); } static void leave_uniprocessor(void) @@ -450,13 +449,13 @@ void enable_mmiotrace(void) goto out; if (nommiotrace) - pr_info(NAME "MMIO tracing disabled.\n"); + pr_info("MMIO tracing disabled.\n"); kmmio_init(); enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); - pr_info(NAME "enabled.\n"); + pr_info("enabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } @@ -475,7 +474,7 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); kmmio_cleanup(); - pr_info(NAME "disabled.\n"); + pr_info("disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } -- cgit v1.2.3 From b7cc9554bc73641c9ed4d7eb74b2d6e78f20abea Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Dec 2009 11:03:39 +0100 Subject: x86/amd-iommu: Fix passthrough mode The data structure changes to use dev->archdata.iommu field broke the iommu=pt mode because in this case the dev->archdata.iommu was left uninitialized. This moves the inititalization of the devices into the main init function and fixes the problem. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 3 ++- arch/x86/kernel/amd_iommu.c | 39 ++++++++++++++++++++++++++++++++-- arch/x86/kernel/amd_iommu_init.c | 7 ++++++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 84786fb9a23b..2566e2606224 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -28,7 +28,8 @@ extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 32fb09102a13..450dd6ac03d3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev) { kfree(dev->archdata.iommu); } + +void __init amd_iommu_uninit_devices(void) +{ + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + iommu_uninit_device(&pdev->dev); + } +} + +int __init amd_iommu_init_devices(void) +{ + struct pci_dev *pdev = NULL; + int ret = 0; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + ret = iommu_init_device(&pdev->dev); + if (ret) + goto out_free; + } + + return 0; + +out_free: + + amd_iommu_uninit_devices(); + + return ret; +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -2145,8 +2182,6 @@ static void prealloc_protection_domains(void) if (!check_device(&dev->dev)) continue; - iommu_init_device(&dev->dev); - /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 7ffc39965233..df01c691d130 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1274,6 +1274,10 @@ static int __init amd_iommu_init(void) if (ret) goto free; + ret = amd_iommu_init_devices(); + if (ret) + goto free; + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1296,6 +1300,9 @@ out: return ret; free: + + amd_iommu_uninit_devices(); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); -- cgit v1.2.3 From 8638c4914f34fedc1c13b1cc13f6d1e5a78c46b4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Dec 2009 11:12:25 +0100 Subject: x86/amd-iommu: Fix PCI hotplug with passthrough mode The device change notifier is initialized in the dma_ops initialization path. But this path is never executed for iommu=pt. Move the notifier initialization to IOMMU hardware init code to fix this. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 1 + arch/x86/kernel/amd_iommu.c | 7 +++++-- arch/x86/kernel/amd_iommu_init.c | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 2566e2606224..4d817f9e6e77 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -30,6 +30,7 @@ extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 450dd6ac03d3..a83185080e91 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1624,6 +1624,11 @@ static struct notifier_block device_nb = { .notifier_call = device_change_notifier, }; +void amd_iommu_init_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &device_nb); +} + /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. @@ -2250,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void) register_iommu(&amd_iommu_ops); - bus_register_notifier(&pci_bus_type, &device_nb); - amd_iommu_stats_init(); return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index df01c691d130..309a52f96e0b 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1285,6 +1285,8 @@ static int __init amd_iommu_init(void) if (ret) goto free; + amd_iommu_init_notifier(); + enable_iommus(); if (iommu_pass_through) -- cgit v1.2.3 From 6b2f3d1f769be5779b479c37800229d9a4809fc3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Oct 2009 11:05:28 +0100 Subject: vfs: Implement proper O_SYNC semantics While Linux provided an O_SYNC flag basically since day 1, it took until Linux 2.4.0-test12pre2 to actually get it implemented for filesystems, since that day we had generic_osync_around with only minor changes and the great "For now, when the user asks for O_SYNC, we'll actually give O_DSYNC" comment. This patch intends to actually give us real O_SYNC semantics in addition to the O_DSYNC semantics. After Jan's O_SYNC patches which are required before this patch it's actually surprisingly simple, we just need to figure out when to set the datasync flag to vfs_fsync_range and when not. This patch renames the existing O_SYNC flag to O_DSYNC while keeping it's numerical value to keep binary compatibility, and adds a new real O_SYNC flag. To guarantee backwards compatiblity it is defined as expanding to both the O_DSYNC and the new additional binary flag (__O_SYNC) to make sure we are backwards-compatible when compiled against the new headers. This also means that all places that don't care about the differences can just check O_DSYNC and get the right behaviour for O_SYNC, too - only places that actuall care need to check __O_SYNC in addition. Drivers and network filesystems have been updated in a fail safe way to always do the full sync magic if O_DSYNC is set. The few places setting O_SYNC for lower layers are kept that way for now to stay failsafe. We enforce that O_DSYNC is set when __O_SYNC is set early in the open path to make sure we always get these sane options. Note that parisc really screwed up their headers as they already define a O_DSYNC that has always been a no-op. We try to repair it by using it for the new O_DSYNC and redefinining O_SYNC to send both the traditional O_SYNC numerical value _and_ the O_DSYNC one. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Grant Grundler Cc: "David S. Miller" Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Al Viro Cc: Andreas Dilger Acked-by: Trond Myklebust Acked-by: Kyle McMartin Acked-by: Ulrich Drepper Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Jan Kara --- arch/alpha/include/asm/fcntl.h | 19 ++++++++++++++++--- arch/blackfin/include/asm/fcntl.h | 2 -- arch/mips/include/asm/fcntl.h | 17 ++++++++++++++++- arch/mips/kernel/kspd.c | 1 + arch/mips/loongson/common/mem.c | 2 +- arch/mips/mm/cache.c | 2 +- arch/parisc/include/asm/fcntl.h | 5 ++--- arch/sparc/include/asm/fcntl.h | 19 ++++++++++++++++--- arch/x86/mm/pat.c | 3 +-- drivers/char/mem.c | 6 +++--- drivers/usb/gadget/file_storage.c | 2 +- fs/afs/write.c | 5 +++-- fs/btrfs/file.c | 4 ++-- fs/cifs/dir.c | 3 ++- fs/cifs/file.c | 6 ++++-- fs/namei.c | 9 +++++++++ fs/nfs/file.c | 4 ++-- fs/nfs/write.c | 2 +- fs/ocfs2/file.c | 2 +- fs/sync.c | 5 +++-- fs/ubifs/file.c | 2 +- fs/xfs/linux-2.6/xfs_lrw.c | 2 +- include/asm-generic/fcntl.h | 25 +++++++++++++++++++++---- sound/core/rawmidi.c | 2 +- 24 files changed, 109 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h index 25da0017ec87..21b1117a0c61 100644 --- a/arch/alpha/include/asm/fcntl.h +++ b/arch/alpha/include/asm/fcntl.h @@ -1,8 +1,6 @@ #ifndef _ALPHA_FCNTL_H #define _ALPHA_FCNTL_H -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ #define O_CREAT 01000 /* not fcntl */ #define O_TRUNC 02000 /* not fcntl */ #define O_EXCL 04000 /* not fcntl */ @@ -10,13 +8,28 @@ #define O_NONBLOCK 00004 #define O_APPEND 00010 -#define O_SYNC 040000 +#define O_DSYNC 040000 /* used to be O_SYNC, see below */ #define O_DIRECTORY 0100000 /* must be a directory */ #define O_NOFOLLOW 0200000 /* don't follow links */ #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ #define O_NOATIME 04000000 #define O_CLOEXEC 010000000 /* set close_on_exec */ +/* + * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using + * the O_SYNC flag. We continue to use the existing numerical value + * for O_DSYNC semantics now, but using the correct symbolic name for it. + * This new value is used to request true Posix O_SYNC semantics. It is + * defined in this strange way to make sure applications compiled against + * new headers get at least O_DSYNC semantics on older kernels. + * + * This has the nice side-effect that we can simply test for O_DSYNC + * wherever we do not care if O_DSYNC or O_SYNC is used. + * + * Note: __O_SYNC must never be used directly. + */ +#define __O_SYNC 020000000 +#define O_SYNC (__O_SYNC|O_DSYNC) #define F_GETLK 7 #define F_SETLK 8 diff --git a/arch/blackfin/include/asm/fcntl.h b/arch/blackfin/include/asm/fcntl.h index 8727b2b382f1..251c911d59c1 100644 --- a/arch/blackfin/include/asm/fcntl.h +++ b/arch/blackfin/include/asm/fcntl.h @@ -7,8 +7,6 @@ #ifndef _BFIN_FCNTL_H #define _BFIN_FCNTL_H -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ #define O_DIRECTORY 040000 /* must be a directory */ #define O_NOFOLLOW 0100000 /* don't follow links */ #define O_DIRECT 0200000 /* direct disk access hint - currently ignored */ diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h index 2a52333a062d..7c6681aa2ab8 100644 --- a/arch/mips/include/asm/fcntl.h +++ b/arch/mips/include/asm/fcntl.h @@ -10,7 +10,7 @@ #define O_APPEND 0x0008 -#define O_SYNC 0x0010 +#define O_DSYNC 0x0010 /* used to be O_SYNC, see below */ #define O_NONBLOCK 0x0080 #define O_CREAT 0x0100 /* not fcntl */ #define O_TRUNC 0x0200 /* not fcntl */ @@ -18,6 +18,21 @@ #define O_NOCTTY 0x0800 /* not fcntl */ #define FASYNC 0x1000 /* fcntl, for BSD compatibility */ #define O_LARGEFILE 0x2000 /* allow large file opens */ +/* + * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using + * the O_SYNC flag. We continue to use the existing numerical value + * for O_DSYNC semantics now, but using the correct symbolic name for it. + * This new value is used to request true Posix O_SYNC semantics. It is + * defined in this strange way to make sure applications compiled against + * new headers get at least O_DSYNC semantics on older kernels. + * + * This has the nice side-effect that we can simply test for O_DSYNC + * wherever we do not care if O_DSYNC or O_SYNC is used. + * + * Note: __O_SYNC must never be used directly. + */ +#define __O_SYNC 0x4000 +#define O_SYNC (__O_SYNC|O_DSYNC) #define O_DIRECT 0x8000 /* direct disk access hint */ #define F_GETLK 14 diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index ad4e017ed2f3..80e2ba694bab 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -82,6 +82,7 @@ static int sp_stopping; #define MTSP_O_SHLOCK 0x0010 #define MTSP_O_EXLOCK 0x0020 #define MTSP_O_ASYNC 0x0040 +/* XXX: check which of these is actually O_SYNC vs O_DSYNC */ #define MTSP_O_FSYNC O_SYNC #define MTSP_O_NOFOLLOW 0x0100 #define MTSP_O_SYNC 0x0080 diff --git a/arch/mips/loongson/common/mem.c b/arch/mips/loongson/common/mem.c index 7c92f79b6480..e94ef158f980 100644 --- a/arch/mips/loongson/common/mem.c +++ b/arch/mips/loongson/common/mem.c @@ -26,7 +26,7 @@ void __init prom_init_memory(void) /* override of arch/mips/mm/cache.c: __uncached_access */ int __uncached_access(struct file *file, unsigned long addr) { - if (file->f_flags & O_SYNC) + if (file->f_flags & O_DSYNC) return 1; return addr >= __pa(high_memory) || diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 694d51f523d1..102b2dfa542a 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -194,7 +194,7 @@ void __devinit cpu_cache_init(void) int __weak __uncached_access(struct file *file, unsigned long addr) { - if (file->f_flags & O_SYNC) + if (file->f_flags & O_DSYNC) return 1; return addr >= __pa(high_memory); diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h index 1e1c824764ee..f357fc693c89 100644 --- a/arch/parisc/include/asm/fcntl.h +++ b/arch/parisc/include/asm/fcntl.h @@ -1,14 +1,13 @@ #ifndef _PARISC_FCNTL_H #define _PARISC_FCNTL_H -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ #define O_APPEND 000000010 #define O_BLKSEEK 000000100 /* HPUX only */ #define O_CREAT 000000400 /* not fcntl */ #define O_EXCL 000002000 /* not fcntl */ #define O_LARGEFILE 000004000 -#define O_SYNC 000100000 +#define __O_SYNC 000100000 +#define O_SYNC (__O_SYNC|O_DSYNC) #define O_NONBLOCK 000200004 /* HPUX has separate NDELAY & NONBLOCK */ #define O_NOCTTY 000400000 /* not fcntl */ #define O_DSYNC 001000000 /* HPUX only */ diff --git a/arch/sparc/include/asm/fcntl.h b/arch/sparc/include/asm/fcntl.h index d4d9c9d852c3..3b9cfb39175e 100644 --- a/arch/sparc/include/asm/fcntl.h +++ b/arch/sparc/include/asm/fcntl.h @@ -1,14 +1,12 @@ #ifndef _SPARC_FCNTL_H #define _SPARC_FCNTL_H -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ #define O_APPEND 0x0008 #define FASYNC 0x0040 /* fcntl, for BSD compatibility */ #define O_CREAT 0x0200 /* not fcntl */ #define O_TRUNC 0x0400 /* not fcntl */ #define O_EXCL 0x0800 /* not fcntl */ -#define O_SYNC 0x2000 +#define O_DSYNC 0x2000 /* used to be O_SYNC, see below */ #define O_NONBLOCK 0x4000 #if defined(__sparc__) && defined(__arch64__) #define O_NDELAY 0x0004 @@ -20,6 +18,21 @@ #define O_DIRECT 0x100000 /* direct disk access hint */ #define O_NOATIME 0x200000 #define O_CLOEXEC 0x400000 +/* + * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using + * the O_SYNC flag. We continue to use the existing numerical value + * for O_DSYNC semantics now, but using the correct symbolic name for it. + * This new value is used to request true Posix O_SYNC semantics. It is + * defined in this strange way to make sure applications compiled against + * new headers get at least O_DSYNC semantics on older kernels. + * + * This has the nice side-effect that we can simply test for O_DSYNC + * wherever we do not care if O_DSYNC or O_SYNC is used. + * + * Note: __O_SYNC must never be used directly. + */ +#define __O_SYNC 0x800000 +#define O_SYNC (__O_SYNC|O_DSYNC) #define F_GETOWN 5 /* for sockets. */ #define F_SETOWN 6 /* for sockets. */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 66b55d6e69ed..ae9648eb1c7f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -704,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (!range_is_allowed(pfn, size)) return 0; - if (file->f_flags & O_SYNC) { + if (file->f_flags & O_DSYNC) flags = _PAGE_CACHE_UC_MINUS; - } #ifdef CONFIG_X86_32 /* diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 30eff80fed6f..fba76fb55abf 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -43,7 +43,7 @@ static inline int uncached_access(struct file *file, unsigned long addr) { #if defined(CONFIG_IA64) /* - * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. + * On ia64, we ignore O_DSYNC because we cannot tolerate memory attribute aliases. */ return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); #elif defined(CONFIG_MIPS) @@ -56,9 +56,9 @@ static inline int uncached_access(struct file *file, unsigned long addr) #else /* * Accessing memory above the top the kernel knows about or through a file pointer - * that was marked O_SYNC will be done non-cached. + * that was marked O_DSYNC will be done non-cached. */ - if (file->f_flags & O_SYNC) + if (file->f_flags & O_DSYNC) return 1; return addr >= __pa(high_memory); #endif diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 1e6aa504d58a..5e14dbaf65bc 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -1713,7 +1713,7 @@ static int do_write(struct fsg_dev *fsg) } if (fsg->cmnd[1] & 0x08) { // FUA spin_lock(&curlun->filp->f_lock); - curlun->filp->f_flags |= O_SYNC; + curlun->filp->f_flags |= O_DSYNC; spin_unlock(&curlun->filp->f_lock); } } diff --git a/fs/afs/write.c b/fs/afs/write.c index c63a3c8beb73..6be1bc31616a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -692,8 +692,9 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, } /* return error values for O_SYNC and IS_SYNC() */ - if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) { - ret = afs_fsync(iocb->ki_filp, dentry, 1); + if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_DSYNC) { + ret = afs_fsync(iocb->ki_filp, dentry, + (iocb->ki_filp->f_flags & __O_SYNC) ? 0 : 1); if (ret < 0) result = ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 06550affbd27..77f759302e12 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -909,7 +909,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unsigned long last_index; int will_write; - will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || + will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || (file->f_flags & O_DIRECT)); nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, @@ -1076,7 +1076,7 @@ out_nolock: if (err) num_written = err; - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { trans = btrfs_start_transaction(root, 1); ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1f42f772865a..6ccf7262d1b7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode, posix_flags |= SMB_O_EXCL; if (oflags & O_TRUNC) posix_flags |= SMB_O_TRUNC; - if (oflags & O_SYNC) + /* be safe and imply O_SYNC for O_DSYNC */ + if (oflags & O_DSYNC) posix_flags |= SMB_O_SYNC; if (oflags & O_DIRECTORY) posix_flags |= SMB_O_DIRECTORY; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 429337eb7afe..057e1dae12ab 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags) reopening a file. They had their effect on the original open */ if (flags & O_APPEND) posix_flags |= (fmode_t)O_APPEND; - if (flags & O_SYNC) - posix_flags |= (fmode_t)O_SYNC; + if (flags & O_DSYNC) + posix_flags |= (fmode_t)O_DSYNC; + if (flags & __O_SYNC) + posix_flags |= (fmode_t)__O_SYNC; if (flags & O_DIRECTORY) posix_flags |= (fmode_t)O_DIRECTORY; if (flags & O_NOFOLLOW) diff --git a/fs/namei.c b/fs/namei.c index d11f404667e9..b83d38f614ff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1678,6 +1678,15 @@ struct file *do_filp_open(int dfd, const char *pathname, int will_write; int flag = open_to_namei_flags(open_flag); + /* + * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only + * check for O_DSYNC if the need any syncing at all we enforce it's + * always set instead of having to deal with possibly weird behaviour + * for malicious applications setting only __O_SYNC. + */ + if (open_flag & __O_SYNC) + open_flag |= O_DSYNC; + if (!acc_mode) acc_mode = MAY_OPEN | ACC_MODE(flag); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f5fdd39e037a..6b891328f332 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) { struct nfs_open_context *ctx; - if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) + if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) return 1; ctx = nfs_file_open_context(filp); if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) @@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); - /* Return error values for O_SYNC and IS_SYNC() */ + /* Return error values for O_DSYNC and IS_SYNC() */ if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c84b5cc1a943..b1ce2ea9b93b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page, */ if (nfs_write_pageuptodate(page, inode) && inode->i_flock == NULL && - !(file->f_flags & O_SYNC)) { + !(file->f_flags & O_DSYNC)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index de059f490586..3d30a1c974a8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2006,7 +2006,7 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); - if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { ret = filemap_fdatawrite_range(file->f_mapping, pos, pos + count - 1); if (ret < 0) diff --git a/fs/sync.c b/fs/sync.c index d104591b066b..b75ca68dc081 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) */ int generic_write_sync(struct file *file, loff_t pos, loff_t count) { - if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) return 0; return vfs_fsync_range(file, file->f_path.dentry, pos, - pos + count - 1, 1); + pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); } EXPORT_SYMBOL(generic_write_sync); diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 1009adc8d602..eaa3d480bc20 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1401,7 +1401,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret < 0) return ret; - if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { + if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_DSYNC)) { err = ubifs_sync_wbufs_by_inode(c, inode); if (err) return err; diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 072050f8d346..339c52b1a434 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -811,7 +811,7 @@ write_retry: XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error2; diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 495dc8af4044..681ddf3e844c 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -3,8 +3,6 @@ #include -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ #define O_ACCMODE 00000003 #define O_RDONLY 00000000 #define O_WRONLY 00000001 @@ -27,8 +25,8 @@ #ifndef O_NONBLOCK #define O_NONBLOCK 00004000 #endif -#ifndef O_SYNC -#define O_SYNC 00010000 +#ifndef O_DSYNC +#define O_DSYNC 00010000 /* used to be O_SYNC, see below */ #endif #ifndef FASYNC #define FASYNC 00020000 /* fcntl, for BSD compatibility */ @@ -51,6 +49,25 @@ #ifndef O_CLOEXEC #define O_CLOEXEC 02000000 /* set close_on_exec */ #endif + +/* + * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using + * the O_SYNC flag. We continue to use the existing numerical value + * for O_DSYNC semantics now, but using the correct symbolic name for it. + * This new value is used to request true Posix O_SYNC semantics. It is + * defined in this strange way to make sure applications compiled against + * new headers get at least O_DSYNC semantics on older kernels. + * + * This has the nice side-effect that we can simply test for O_DSYNC + * wherever we do not care if O_DSYNC or O_SYNC is used. + * + * Note: __O_SYNC must never be used directly. + */ +#ifndef O_SYNC +#define __O_SYNC 04000000 +#define O_SYNC (__O_SYNC|O_DSYNC) +#endif + #ifndef O_NDELAY #define O_NDELAY O_NONBLOCK #endif diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 2f766123b158..0f5a194695d9 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -1257,7 +1257,7 @@ static ssize_t snd_rawmidi_write(struct file *file, const char __user *buf, break; count -= count1; } - if (file->f_flags & O_SYNC) { + if (file->f_flags & O_DSYNC) { spin_lock_irq(&runtime->lock); while (runtime->avail != runtime->buffer_size) { wait_queue_t wait; -- cgit v1.2.3 From 5e855db5d8fec44e6604eb245aa9077bbd3f0d05 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 10 Dec 2009 17:08:54 +0800 Subject: perf_event: Fix variable initialization in other codepaths Signed-off-by: Xiao Guangrong Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <4B20BAA6.7010609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++++ kernel/perf_event.c | 1 + 2 files changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d35f26076ae5..1342f236e32a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) data.period = event->hw.last_period; data.addr = 0; + data.raw = NULL; regs.ip = 0; /* @@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 ack, status; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3a5d6c4786bb..d891ec4a8100 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4300,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data) struct perf_sample_data sample; struct pt_regs *regs = data; + sample.raw = NULL; sample.addr = bp->attr.bp_addr; if (!perf_exclude_event(bp, regs)) -- cgit v1.2.3 From 125580380f418000b1a06d9a54700f1191b6e561 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 10 Dec 2009 19:56:34 +0300 Subject: x86, perf events: Check if we have APIC enabled Ralf Hildebrandt reported this boot warning: | Running a vanilla 2.6.32 as Xen DomU, I'm getting: | | [ 0.000999] CPU: Physical Processor ID: 0 | [ 0.000999] CPU: Processor Core ID: 1 | [ 0.000999] Performance Events: AMD PMU driver. | [ 0.000999] ------------[ cut here ]------------ | [ 0.000999] WARNING: at arch/x86/kernel/apic/apic.c:249 native_apic_write_dummy So we need to check if APIC functionality is available, and not just in the P6 driver but elsewhere as well. Reported-by: Ralf Hildebrandt Signed-off-by: Cyrill Gorcunov Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091210165634.GF5086@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1342f236e32a..18f05eccbb62 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2066,12 +2066,6 @@ static __init int p6_pmu_init(void) x86_pmu = p6_pmu; - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - return 0; } @@ -2163,6 +2157,16 @@ static __init int amd_pmu_init(void) return 0; } +static void __init pmu_check_apic(void) +{ + if (cpu_has_apic) + return; + + x86_pmu.apic = 0; + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); +} + void __init init_hw_perf_events(void) { int err; @@ -2184,6 +2188,8 @@ void __init init_hw_perf_events(void) return; } + pmu_check_apic(); + pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { -- cgit v1.2.3 From 3bd95dfb182969dc6d2a317c150e0df7107608d3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:40 -0500 Subject: x86, 64-bit: Move kernel_thread to C Prepare for merging with 32-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-2-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 49 +++------------------------------------- arch/x86/kernel/process_64.c | 31 +++++++++++++++++++++++-- arch/x86/kernel/x8664_ksyms_64.c | 2 -- 3 files changed, 32 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 63bca794c8f9..73d9b2c0e217 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,63 +1166,20 @@ bad_gs: jmp 2b .previous -/* - * Create a kernel thread. - * - * C extern interface: - * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) - * - * asm input arguments: - * rdi: fn, rsi: arg, rdx: flags - */ -ENTRY(kernel_thread) - CFI_STARTPROC - FAKE_STACK_FRAME $child_rip - SAVE_ALL - - # rdi: flags, rsi: usp, rdx: will be &pt_regs - movq %rdx,%rdi - orq kernel_thread_flags(%rip),%rdi - movq $-1, %rsi - movq %rsp, %rdx - - xorl %r8d,%r8d - xorl %r9d,%r9d - - # clone now - call do_fork - movq %rax,RAX(%rsp) - xorl %edi,%edi - - /* - * It isn't worth to check for reschedule here, - * so internally to the x86_64 port you can rely on kernel_thread() - * not to reschedule the child before returning, this avoids the need - * of hacks for example to fork off the per-CPU idle tasks. - * [Hopefully no generic code relies on the reschedule -AK] - */ - RESTORE_ALL - UNFAKE_STACK_FRAME - ret - CFI_ENDPROC -END(kernel_thread) - -ENTRY(child_rip) +ENTRY(kernel_thread_helper) pushq $0 # fake return address CFI_STARTPROC /* * Here we are in the child and the registers are set as they were * at kernel_thread() invocation in the parent. */ - movq %rdi, %rax - movq %rsi, %rdi - call *%rax + call *%rsi # exit mov %eax, %edi call do_exit ud2 # padding for call trace CFI_ENDPROC -END(child_rip) +END(kernel_thread_helper) /* * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 83019f94b83d..92484c2130c6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -59,8 +59,6 @@ asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); static DEFINE_PER_CPU(unsigned char, is_idle); -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; - static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) @@ -231,6 +229,35 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } +/* + * This gets run with %si containing the + * function to call, and %di containing + * the "args". + */ +extern void kernel_thread_helper(void); + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.si = (unsigned long) fn; + regs.di = (unsigned long) arg; + + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; + regs.cs = __KERNEL_CS; + regs.flags = X86_EFLAGS_IF; + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, ~0UL, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a1029769b6f2..9fafaf83b3b8 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -17,8 +17,6 @@ EXPORT_SYMBOL(mcount); #endif -EXPORT_SYMBOL(kernel_thread); - EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); EXPORT_SYMBOL(__get_user_4); -- cgit v1.2.3 From fa4b8f84383ae197e643a46c36bf58ab8dffc95c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:41 -0500 Subject: x86, 64-bit: Use user_mode() to determine new stack pointer in copy_thread() Use user_mode() instead of a magic value for sp to determine when returning to kernel mode. Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-3-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process_64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 92484c2130c6..00ac66fa5c6b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -254,7 +254,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.flags = X86_EFLAGS_IF; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, ~0UL, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); @@ -312,8 +312,9 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, *childregs = *regs; childregs->ax = 0; - childregs->sp = sp; - if (sp == ~0UL) + if (user_mode(regs)) + childregs->sp = sp; + else childregs->sp = (unsigned long)childregs; p->thread.sp = (unsigned long) childregs; -- cgit v1.2.3 From e840227c141116171c89ab1abb5cc9fee6fdb488 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:42 -0500 Subject: x86, 32-bit: Use same regs as 64-bit for kernel_thread_helper The arg should be in %eax, but that is clobbered by the return value of clone. The function pointer can be in any register. Also, don't push args onto the stack, since regparm(3) is the normal calling convention now. Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 8 ++------ arch/x86/kernel/process_32.c | 8 ++++---- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cb12b9bfc9cc..44a8e0dc6737 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1047,12 +1047,8 @@ END(spurious_interrupt_bug) ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder CFI_STARTPROC - movl %edx,%eax - push %edx - CFI_ADJUST_CFA_OFFSET 4 - call *%ebx - push %eax - CFI_ADJUST_CFA_OFFSET 4 + movl %edi,%eax + call *%esi call do_exit ud2 # padding for call trace CFI_ENDPROC diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 506d5a7ba17c..bd874d2b6ab1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -193,8 +193,8 @@ void show_regs(struct pt_regs *regs) } /* - * This gets run with %bx containing the - * function to call, and %dx containing + * This gets run with %si containing the + * function to call, and %di containing * the "args". */ extern void kernel_thread_helper(void); @@ -208,8 +208,8 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) memset(®s, 0, sizeof(regs)); - regs.bx = (unsigned long) fn; - regs.dx = (unsigned long) arg; + regs.si = (unsigned long) fn; + regs.di = (unsigned long) arg; regs.ds = __USER_DS; regs.es = __USER_DS; -- cgit v1.2.3 From f443ff4201dd25cd4dec183f9919ecba90c8edc2 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:43 -0500 Subject: x86: Sync 32/64-bit kernel_thread Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process_32.c | 5 ++++- arch/x86/kernel/process_64.c | 11 +++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bd874d2b6ab1..f2e8b05a4f02 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -211,14 +211,17 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.si = (unsigned long) fn; regs.di = (unsigned long) arg; +#ifdef CONFIG_X86_32 regs.ds = __USER_DS; regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; regs.gs = __KERNEL_STACK_CANARY; +#endif + regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; + regs.flags = X86_EFLAGS_IF | 0x2; /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 00ac66fa5c6b..d49a9094f6f3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -248,10 +248,17 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.si = (unsigned long) fn; regs.di = (unsigned long) arg; +#ifdef CONFIG_X86_32 + regs.ds = __USER_DS; + regs.es = __USER_DS; + regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; +#endif + regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS; - regs.flags = X86_EFLAGS_IF; + regs.cs = __KERNEL_CS | get_kernel_rpl(); + regs.flags = X86_EFLAGS_IF | 0x2; /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -- cgit v1.2.3 From df59e7bf439918f523ac29e996ec1eebbed60440 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:44 -0500 Subject: x86: Merge kernel_thread() Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process.c | 35 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 36 ------------------------------------ arch/x86/kernel/process_64.c | 36 ------------------------------------ 3 files changed, 35 insertions(+), 72 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f3c1a6b3a65e..8705ccedd447 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -243,6 +243,41 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } +/* + * This gets run with %si containing the + * function to call, and %di containing + * the "args". + */ +extern void kernel_thread_helper(void); + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.si = (unsigned long) fn; + regs.di = (unsigned long) arg; + +#ifdef CONFIG_X86_32 + regs.ds = __USER_DS; + regs.es = __USER_DS; + regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; +#endif + + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; + regs.cs = __KERNEL_CS | get_kernel_rpl(); + regs.flags = X86_EFLAGS_IF | 0x2; + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f2e8b05a4f02..ccf234266a2e 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -192,42 +192,6 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, ®s->sp, regs->bp); } -/* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | 0x2; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - void release_thread(struct task_struct *dead_task) { BUG_ON(dead_task->mm); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d49a9094f6f3..1a362c5bec37 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -229,42 +229,6 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -/* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | 0x2; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { -- cgit v1.2.3 From ebb682f522411abbe358059a256a8672ec0bd55b Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 9 Dec 2009 13:36:45 -0500 Subject: x86, AMD: Fix stale cpuid4_info shared_map data in shared_cpu_map cpumasks The per_cpu cpuid4_info shared_map can contain stale data when CPUs are added and removed. The stale data can lead to a NULL pointer derefernce panic on a remove of a CPU that has had siblings previously removed. This patch resolves the panic by verifying a cpu is actually online before adding it to the shared_cpu_map, only examining cpus that are part of the same lower level cache, and by updating other siblings lowest level cache maps when a cpu is added. Signed-off-by: Prarit Bhargava LKML-Reference: <20091209183336.17855.98708.sendpatchset@prarit.bos.redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel_cacheinfo.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6c40f6b5b340..63ada177b40c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -507,18 +507,19 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; - int index_msb, i; + int index_msb, i, sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { - struct cpuinfo_x86 *d; - for_each_online_cpu(i) { + for_each_cpu(i, c->llc_shared_map) { if (!per_cpu(cpuid4_info, i)) continue; - d = &cpu_data(i); this_leaf = CPUID4_INFO_IDX(i, index); - cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), - d->llc_shared_map); + for_each_cpu(sibling, c->llc_shared_map) { + if (!cpu_online(sibling)) + continue; + set_bit(sibling, this_leaf->shared_cpu_map); + } } return; } -- cgit v1.2.3 From 893f38d144a4d96d2483cd7c3801d26e1b2c23e9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 10 Dec 2009 13:07:22 -0800 Subject: x86: Use find_e820() instead of hard coded trampoline address Jens found the following crash/regression: [ 0.000000] found SMP MP-table at [ffff8800000fdd80] fdd80 [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 0-fff BIOS data page and [ 0.000000] Kernel panic - not syncing: Overlapping early reservations 12-f011 MP-table mpc to 6000-7fff TRAMPOLINE and bisected it to b24c2a9 ("x86: Move find_smp_config() earlier and avoid bootmem usage"). It turns out the BIOS is using the first 64k for mptable, without reserving it. So try to find good range for the real-mode trampoline instead of hard coding it, in case some bios tries to use that range for sth. Reported-by: Jens Axboe Signed-off-by: Yinghai Lu Tested-by: Jens Axboe Cc: Randy Dunlap LKML-Reference: <4B21630A.6000308@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/trampoline.h | 1 - arch/x86/kernel/e820.c | 11 ++++++++++- arch/x86/kernel/head32.c | 2 -- arch/x86/kernel/head64.c | 2 -- arch/x86/kernel/mpparse.c | 3 --- arch/x86/kernel/setup.c | 13 ++++++++----- arch/x86/kernel/trampoline.c | 20 +++++++++----------- 7 files changed, 27 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 90f06c25221d..cb507bb05d79 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -16,7 +16,6 @@ extern unsigned long initial_code; extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) -#define TRAMPOLINE_BASE 0x6000 extern unsigned long setup_trampoline(void); extern void __init reserve_trampoline_memory(void); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d17d482a04f4..f50447d961c0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -732,7 +732,16 @@ struct early_res { char overlap_ok; }; static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ + { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ +#ifdef CONFIG_X86_32 + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 }, +#endif + {} }; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 4f8e2507e8f3..5051b94c9069 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { - reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0b06cd778fd9..b5a9896ca1e7 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data) { copy_bootdata(__va(real_mode_data)); - reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 35a57c963df9..40b54ceb68b5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -945,9 +945,6 @@ void __init early_reserve_e820_mpc_new(void) { if (enable_update_mptable && alloc_mptable) { u64 startt = 0; -#ifdef CONFIG_X86_TRAMPOLINE - startt = TRAMPOLINE_BASE; -#endif mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); } } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 946a311a25c9..f7b8b9894b22 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -73,6 +73,7 @@ #include #include +#include #include #include #include @@ -875,6 +876,13 @@ void __init setup_arch(char **cmdline_p) reserve_brk(); + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); + + reserve_trampoline_memory(); + #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -921,11 +929,6 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); - #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index cd022121cab6..c652ef62742d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -12,21 +12,19 @@ #endif /* ready for x86_64 and x86 */ -unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); +unsigned char *__trampinitdata trampoline_base; void __init reserve_trampoline_memory(void) { -#ifdef CONFIG_X86_32 - /* - * But first pinch a few for the stack/trampoline stuff - * FIXME: Don't need the extra page at 4K, but need to fix - * trampoline before removing it. (see the GDT stuff) - */ - reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); -#endif + unsigned long mem; + /* Has to be in very low memory so we can execute real-mode AP code. */ - reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, - "TRAMPOLINE"); + mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); + if (mem == -1L) + panic("Cannot allocate trampoline\n"); + + trampoline_base = __va(mem); + reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); } /* -- cgit v1.2.3 From f8b7256096a20436f6d0926747e3ac3d64c81d24 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Nov 2009 17:37:04 -0500 Subject: Unify sys_mmap* New helper - sys_mmap_pgoff(); switch syscalls to using it. Acked-by: David S. Miller Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 19 +++---- arch/arm/kernel/entry-common.S | 4 +- arch/arm/kernel/sys_arm.c | 30 +---------- arch/avr32/include/asm/syscalls.h | 4 -- arch/avr32/kernel/sys_avr32.c | 31 ------------ arch/avr32/kernel/syscall-stubs.S | 2 +- arch/blackfin/kernel/sys_bfin.c | 33 ------------ arch/blackfin/mach-common/entry.S | 2 +- arch/cris/kernel/sys_cris.c | 30 ++--------- arch/frv/kernel/sys_frv.c | 66 +----------------------- arch/h8300/kernel/sys_h8300.c | 83 +------------------------------ arch/h8300/kernel/syscalls.S | 2 +- arch/ia64/kernel/sys_ia64.c | 37 +------------- arch/m32r/kernel/sys_m32r.c | 24 --------- arch/m32r/kernel/syscall_table.S | 2 +- arch/m68k/kernel/sys_m68k.c | 83 +++---------------------------- arch/m68knommu/kernel/sys_m68k.c | 38 +------------- arch/m68knommu/kernel/syscalltable.S | 2 +- arch/microblaze/kernel/sys_microblaze.c | 38 ++------------ arch/microblaze/kernel/syscall_table.S | 2 +- arch/mips/kernel/linux32.c | 19 +------ arch/mips/kernel/syscall.c | 29 +---------- arch/mn10300/kernel/entry.S | 2 +- arch/mn10300/kernel/sys_mn10300.c | 31 +----------- arch/parisc/kernel/sys_parisc.c | 30 ++--------- arch/powerpc/kernel/syscalls.c | 15 +----- arch/s390/kernel/compat_linux.c | 32 ++---------- arch/s390/kernel/sys_s390.c | 30 +---------- arch/score/kernel/sys_score.c | 25 ++-------- arch/sh/kernel/sys_sh.c | 28 +---------- arch/sparc/kernel/sys_sparc_32.c | 31 ++---------- arch/sparc/kernel/sys_sparc_64.c | 22 +++----- arch/um/kernel/syscall.c | 28 +---------- arch/um/sys-i386/shared/sysdep/syscalls.h | 4 -- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/ia32/sys_ia32.c | 43 +--------------- arch/x86/include/asm/sys_ia32.h | 3 -- arch/x86/include/asm/syscalls.h | 2 - arch/x86/kernel/sys_i386_32.c | 27 +--------- arch/x86/kernel/sys_x86_64.c | 17 +------ arch/x86/kernel/syscall_table_32.S | 2 +- arch/xtensa/include/asm/syscall.h | 2 - arch/xtensa/include/asm/unistd.h | 2 +- arch/xtensa/kernel/syscall.c | 25 ---------- include/linux/syscalls.h | 4 ++ mm/util.c | 29 +++++++++++ 46 files changed, 109 insertions(+), 907 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 9a3334ae282e..62619f25132f 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -178,25 +178,18 @@ SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, off) { - struct file *file = NULL; - unsigned long ret = -EBADF; + unsigned long ret = -EINVAL; #if 0 if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED)) printk("%s: unimplemented OSF mmap flags %04lx\n", current->comm, flags); #endif - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flags, off); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); + if ((off + PAGE_ALIGN(len)) < off) + goto out; + if (off & ~PAGE_MASK) + goto out; + ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return ret; } diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f0fe95b7085d..2c1db77d7848 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -416,12 +416,12 @@ sys_mmap2: tst r5, #PGOFF_MASK moveq r5, r5, lsr #PAGE_SHIFT - 12 streq r5, [sp, #4] - beq do_mmap2 + beq sys_mmap_pgoff mov r0, #-EINVAL mov pc, lr #else str r5, [sp, #4] - b do_mmap2 + b sys_mmap_pgoff #endif ENDPROC(sys_mmap2) diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 3b897444a9bd..ae4027bd01bd 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -28,34 +28,6 @@ #include #include -/* common code for old and new mmaps */ -inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EINVAL; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - error = -EBADF; - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - struct mmap_arg_struct { unsigned long addr; unsigned long len; @@ -77,7 +49,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); out: return error; } diff --git a/arch/avr32/include/asm/syscalls.h b/arch/avr32/include/asm/syscalls.h index 483d666c27c0..66a197266637 100644 --- a/arch/avr32/include/asm/syscalls.h +++ b/arch/avr32/include/asm/syscalls.h @@ -29,10 +29,6 @@ asmlinkage int sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); asmlinkage int sys_rt_sigreturn(struct pt_regs *); -/* kernel/sys_avr32.c */ -asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, off_t); - /* mm/cache.c */ asmlinkage int sys_cacheflush(int, void __user *, size_t); diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 5d2daeaf356f..459349b5ed5a 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -5,39 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include #include -#include -#include -#include - -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t offset) -{ - int error = -EBADF; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return error; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, offset); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); - return error; -} - int kernel_execve(const char *file, char **argv, char **envp) { register long scno asm("r8") = __NR_execve; diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S index f7244cd02fbb..0447a3e2ba64 100644 --- a/arch/avr32/kernel/syscall-stubs.S +++ b/arch/avr32/kernel/syscall-stubs.S @@ -61,7 +61,7 @@ __sys_execve: __sys_mmap2: pushm lr st.w --sp, ARG6 - call sys_mmap2 + call sys_mmap_pgoff sub sp, -4 popm pc diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c index afcef129d4e8..2e7f8e10bf87 100644 --- a/arch/blackfin/kernel/sys_bfin.c +++ b/arch/blackfin/kernel/sys_bfin.c @@ -22,39 +22,6 @@ #include #include -/* common code for old and new mmaps */ -static inline long -do_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); - out: - return error; -} - -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - asmlinkage void *sys_sram_alloc(size_t size, unsigned long flags) { return sram_alloc_with_lsl(size, flags); diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index a50637a8b9bd..f3f8bb46b517 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1422,7 +1422,7 @@ ENTRY(_sys_call_table) .long _sys_ni_syscall /* streams2 */ .long _sys_vfork /* 190 */ .long _sys_getrlimit - .long _sys_mmap2 + .long _sys_mmap_pgoff .long _sys_truncate64 .long _sys_ftruncate64 .long _sys_stat64 /* 195 */ diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c index 2ad962c7e88e..c2bbb1ac98a9 100644 --- a/arch/cris/kernel/sys_cris.c +++ b/arch/cris/kernel/sys_cris.c @@ -26,31 +26,6 @@ #include #include -/* common code for old and new mmaps */ -static inline long -do_mmap2(unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - asmlinkage unsigned long old_mmap(unsigned long __user *args) { unsigned long buffer[6]; @@ -63,7 +38,7 @@ asmlinkage unsigned long old_mmap(unsigned long __user *args) if (buffer[5] & ~PAGE_MASK) /* verify that offset is on page boundary */ goto out; - err = do_mmap2(buffer[0], buffer[1], buffer[2], buffer[3], + err = sys_mmap_pgoff(buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5] >> PAGE_SHIFT); out: return err; @@ -73,7 +48,8 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - return do_mmap2(addr, len, prot, flags, fd, pgoff); + /* bug(?): 8Kb pages here */ + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } /* diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c index 2b6b5289cdcc..1d3d4c9e2521 100644 --- a/arch/frv/kernel/sys_frv.c +++ b/arch/frv/kernel/sys_frv.c @@ -31,9 +31,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - int error = -EBADF; - struct file * file = NULL; - /* As with sparc32, make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have.... */ @@ -41,69 +38,10 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, trying to map something we can't */ if (pgoff & ((1 << (PAGE_SHIFT - 12)) - 1)) return -EINVAL; - pgoff >>= PAGE_SHIFT - 12; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - -#if 0 /* DAVIDM - do we want this */ -struct mmap_arg_struct64 { - __u32 addr; - __u32 len; - __u32 prot; - __u32 flags; - __u64 offset; /* 64 bits */ - __u32 fd; -}; - -asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg) -{ - int error = -EFAULT; - struct file * file = NULL; - struct mmap_arg_struct64 a; - unsigned long pgoff; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - - if ((long)a.offset & ~PAGE_MASK) - return -EINVAL; - - pgoff = a.offset >> PAGE_SHIFT; - if ((a.offset >> PAGE_SHIFT) != pgoff) - return -EINVAL; - - if (!(a.flags & MAP_ANONYMOUS)) { - error = -EBADF; - file = fget(a.fd); - if (!file) - goto out; - } - a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); -out: - return error; + return sys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } -#endif /* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index 8cb5d73a0e35..b5969db0ca10 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -26,39 +26,6 @@ #include #include -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to @@ -87,57 +54,11 @@ asmlinkage int old_mmap(struct mmap_arg_struct *arg) if (a.offset & ~PAGE_MASK) goto out; - a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); -out: - return error; -} - -#if 0 /* DAVIDM - do we want this */ -struct mmap_arg_struct64 { - __u32 addr; - __u32 len; - __u32 prot; - __u32 flags; - __u64 offset; /* 64 bits */ - __u32 fd; -}; - -asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg) -{ - int error = -EFAULT; - struct file * file = NULL; - struct mmap_arg_struct64 a; - unsigned long pgoff; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - - if ((long)a.offset & ~PAGE_MASK) - return -EINVAL; - - pgoff = a.offset >> PAGE_SHIFT; - if ((a.offset >> PAGE_SHIFT) != pgoff) - return -EINVAL; - - if (!(a.flags & MAP_ANONYMOUS)) { - error = -EBADF; - file = fget(a.fd); - if (!file) - goto out; - } - a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); out: return error; } -#endif struct sel_arg_struct { unsigned long n; diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S index 4eb67faac633..2d69881eda6a 100644 --- a/arch/h8300/kernel/syscalls.S +++ b/arch/h8300/kernel/syscalls.S @@ -206,7 +206,7 @@ SYMBOL_NAME_LABEL(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ .long SYMBOL_NAME(sys_vfork) /* 190 */ .long SYMBOL_NAME(sys_getrlimit) - .long SYMBOL_NAME(sys_mmap2) + .long SYMBOL_NAME(sys_mmap_pgoff) .long SYMBOL_NAME(sys_truncate64) .long SYMBOL_NAME(sys_ftruncate64) .long SYMBOL_NAME(sys_stat64) /* 195 */ diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 92ed83f34036..ae384a2974c2 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -185,39 +185,6 @@ int ia64_mmap_check(unsigned long addr, unsigned long len, return 0; } -static inline unsigned long -do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) -{ - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return -EBADF; - - if (!file->f_op || !file->f_op->mmap) { - addr = -ENODEV; - goto out; - } - } - - /* Careful about overflows.. */ - len = PAGE_ALIGN(len); - if (!len || len > TASK_SIZE) { - addr = -EINVAL; - goto out; - } - - down_write(¤t->mm->mmap_sem); - addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - -out: if (file) - fput(file); - return addr; -} - /* * mmap2() is like mmap() except that the offset is expressed in units * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces @@ -226,7 +193,7 @@ out: if (file) asmlinkage unsigned long sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) { - addr = do_mmap2(addr, len, prot, flags, fd, pgoff); + addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; @@ -238,7 +205,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo if (offset_in_page(off) != 0) return -EINVAL; - addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index 305ac852bbed..d3c865c5a6ba 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -76,30 +76,6 @@ asmlinkage int sys_tas(int __user *addr) return oldval; } -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - /* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. * diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S index aa3bf4cfab37..60536e271233 100644 --- a/arch/m32r/kernel/syscall_table.S +++ b/arch/m32r/kernel/syscall_table.S @@ -191,7 +191,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* streams2 */ .long sys_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 + .long sys_mmap_pgoff .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 7deb402bfc75..218f441de667 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -29,37 +29,16 @@ #include #include -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - return do_mmap2(addr, len, prot, flags, fd, pgoff); + /* + * This is wrong for sun3 - there PAGE_SIZE is 8Kb, + * so we need to shift the argument down by 1; m68k mmap64(3) + * (in libc) expects the last argument of mmap2 in 4Kb units. + */ + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } /* @@ -90,57 +69,11 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); -out: - return error; -} - -#if 0 -struct mmap_arg_struct64 { - __u32 addr; - __u32 len; - __u32 prot; - __u32 flags; - __u64 offset; /* 64 bits */ - __u32 fd; -}; - -asmlinkage long sys_mmap64(struct mmap_arg_struct64 *arg) -{ - int error = -EFAULT; - struct file * file = NULL; - struct mmap_arg_struct64 a; - unsigned long pgoff; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - - if ((long)a.offset & ~PAGE_MASK) - return -EINVAL; - - pgoff = a.offset >> PAGE_SHIFT; - if ((a.offset >> PAGE_SHIFT) != pgoff) - return -EINVAL; - - if (!(a.flags & MAP_ANONYMOUS)) { - error = -EBADF; - file = fget(a.fd); - if (!file) - goto out; - } - a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, pgoff); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); out: return error; } -#endif struct sel_arg_struct { unsigned long n; diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index efdd090778a3..b67cbc735a9b 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -27,39 +27,6 @@ #include #include -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to @@ -88,9 +55,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct *arg) if (a.offset & ~PAGE_MASK) goto out; - a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); out: return error; } diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 23535cc415ae..486837efa3d7 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -210,7 +210,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* streams2 */ .long sys_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 + .long sys_mmap_pgoff .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 07cabed4b947..9f3c205fb75b 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -62,46 +62,14 @@ out: return error; } -asmlinkage long -sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct file *file = NULL; - int ret = -EBADF; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) { - printk(KERN_INFO "no fd in mmap\r\n"); - goto out; - } - } - - down_write(¤t->mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); -out: - return ret; -} - asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, off_t pgoff) { - int err = -EINVAL; - - if (pgoff & ~PAGE_MASK) { - printk(KERN_INFO "no pagemask in mmap\r\n"); - goto out; - } + if (pgoff & ~PAGE_MASK) + return -EINVAL; - err = sys_mmap2(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); -out: - return err; + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); } /* diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index c1ab1dc10898..b96f365ea6b1 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -196,7 +196,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* reserved for streams2 */ .long sys_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 /* mmap2 */ + .long sys_mmap_pgoff /* mmap2 */ .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 1a2793efdc4e..f042563c924f 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -67,28 +67,13 @@ SYSCALL_DEFINE6(32_mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) { - struct file * file = NULL; unsigned long error; error = -EINVAL; if (pgoff & (~PAGE_MASK >> 12)) goto out; - pgoff >>= PAGE_SHIFT-12; - - if (!(flags & MAP_ANONYMOUS)) { - error = -EBADF; - file = fget(fd); - if (!file) - goto out; - } - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); - + error = sys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT-12)); out: return error; } diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index fe0d79805603..c25b2e7dcb7b 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -129,31 +129,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, } } -/* common code for old and new mmaps */ -static inline unsigned long -do_mmap2(unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long pgoff) -{ - unsigned long error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, off_t, offset) @@ -164,7 +139,7 @@ SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len, if (offset & ~PAGE_MASK) goto out; - result = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + result = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); out: return result; @@ -177,7 +152,7 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT-12)); + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT-12)); } save_static_function(sys_fork); diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index a94e7ea3faa6..c9ee6c009d79 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -578,7 +578,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* reserved for streams2 */ .long sys_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 + .long sys_mmap_pgoff .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index ec4100dfcb7d..17cc6ce04e84 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -23,42 +23,13 @@ #include -/* - * memory mapping syscall - */ -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct file *file = NULL; - long error = -EINVAL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - error = -EBADF; - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - asmlinkage long old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset) { if (offset & ~PAGE_MASK) return -EINVAL; - return sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); } struct sel_arg_struct { diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 71b31957c8f1..9147391afb03 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -110,37 +110,14 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } -static unsigned long do_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, unsigned long fd, - unsigned long pgoff) -{ - struct file * file = NULL; - unsigned long error = -EBADF; - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file != NULL) - fput(file); -out: - return error; -} - asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { /* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have. */ - return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT - 12)); + return sys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, @@ -148,7 +125,8 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, unsigned long offset) { if (!(offset & ~PAGE_MASK)) { - return do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + return sys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); } else { return -EINVAL; } diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index c04832c4a02e..3370e62e43d4 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -140,7 +140,6 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) { - struct file * file = NULL; unsigned long ret = -EINVAL; if (!arch_validate_prot(prot)) @@ -151,20 +150,8 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len, goto out; off >>= shift; } - - ret = -EBADF; - if (!(flags & MAP_ANONYMOUS)) { - if (!(file = fget(fd))) - goto out; - } - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flags, off); - up_write(¤t->mm->mmap_sem); - if (file) - fput(file); + ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off); out: return ret; } diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 11556aa6bf17..22c9e557bb22 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -624,33 +624,6 @@ struct mmap_arg_struct_emu31 { u32 offset; }; -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct file * file = NULL; - unsigned long error = -EBADF; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - - asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) { @@ -664,7 +637,8 @@ old32_mmap(struct mmap_arg_struct_emu31 __user *arg) if (a.offset & ~PAGE_MASK) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); out: return error; } @@ -677,7 +651,7 @@ sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) if (copy_from_user(&a, arg, sizeof(a))) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); out: return error; } diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index e9d94f61d500..86a74c9c9e63 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -32,32 +32,6 @@ #include #include "entry.h" -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - long error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux for S/390 isn't able to handle more than 5 @@ -81,7 +55,7 @@ SYSCALL_DEFINE1(mmap2, struct mmap_arg_struct __user *, arg) if (copy_from_user(&a, arg, sizeof(a))) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); out: return error; } @@ -98,7 +72,7 @@ SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct __user *, arg) if (a.offset & ~PAGE_MASK) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); out: return error; } diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c index 001249469866..3d6a67dd628c 100644 --- a/arch/score/kernel/sys_score.c +++ b/arch/score/kernel/sys_score.c @@ -36,34 +36,15 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) { - int error = -EBADF; - struct file *file = NULL; - - if (pgoff & (~PAGE_MASK >> 12)) - return -EINVAL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return error; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); - - return error; + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, off_t pgoff) { - return sys_mmap2(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); + /* where's the alignment check? */ + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); } asmlinkage long diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index 8aa5d1ceaf14..71399cde03b5 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c @@ -28,37 +28,13 @@ #include #include -static inline long -do_mmap2(unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, int fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - asmlinkage int old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, int fd, unsigned long off) { if (off & ~PAGE_MASK) return -EINVAL; - return do_mmap2(addr, len, prot, flags, fd, off>>PAGE_SHIFT); + return sys_mmap_pgoff(addr, len, prot, flags, fd, off>>PAGE_SHIFT); } asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, @@ -74,7 +50,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, pgoff >>= PAGE_SHIFT - 12; - return do_mmap2(addr, len, prot, flags, fd, pgoff); + return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } /* diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 10c43bea32c7..36f6f26d9cec 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -234,31 +234,6 @@ int sparc_mmap_check(unsigned long addr, unsigned long len) } /* Linux version of mmap */ -static unsigned long do_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, unsigned long fd, - unsigned long pgoff) -{ - struct file * file = NULL; - unsigned long retval = -EBADF; - - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - len = PAGE_ALIGN(len); - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return retval; -} asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, @@ -266,14 +241,16 @@ asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len, { /* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have. */ - return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT - 12)); + return sys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) { - return do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + /* no alignment check? */ + return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } long sparc_remap_file_pages(unsigned long start, unsigned long size, diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index d498b32c75f6..8f9cd58497de 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -572,23 +572,13 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, off) { - struct file * file = NULL; - unsigned long retval = -EBADF; + unsigned long retval = -EINVAL; - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - len = PAGE_ALIGN(len); - - down_write(¤t->mm->mmap_sem); - retval = do_mmap(file, addr, len, prot, flags, off); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); + if ((off + PAGE_ALIGN(len)) < off) + goto out; + if (off & ~PAGE_MASK) + goto out; + retval = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return retval; } diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index a4625c7b2bf9..cccab850c27e 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -8,6 +8,7 @@ #include "linux/mm.h" #include "linux/sched.h" #include "linux/utsname.h" +#include "linux/syscalls.h" #include "asm/current.h" #include "asm/mman.h" #include "asm/uaccess.h" @@ -37,31 +38,6 @@ long sys_vfork(void) return ret; } -/* common code for old and new mmaps */ -long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - long error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); - out: - return error; -} - long old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset) @@ -70,7 +46,7 @@ long old_mmap(unsigned long addr, unsigned long len, if (offset & ~PAGE_MASK) goto out; - err = sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + err = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); out: return err; } diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/um/sys-i386/shared/sysdep/syscalls.h index 905698197e35..e7787679e317 100644 --- a/arch/um/sys-i386/shared/sysdep/syscalls.h +++ b/arch/um/sys-i386/shared/sysdep/syscalls.h @@ -20,7 +20,3 @@ extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ ((long (*)(struct syscall_args)) \ (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) - -extern long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4eefdca9832b..53147ad85b96 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -696,7 +696,7 @@ ia32_sys_call_table: .quad quiet_ni_syscall /* streams2 */ .quad stub32_vfork /* 190 */ .quad compat_sys_getrlimit - .quad sys32_mmap2 + .quad sys_mmap_pgoff .quad sys32_truncate64 .quad sys32_ftruncate64 .quad sys32_stat64 /* 195 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index df82c0e48ded..422572c77923 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -155,9 +155,6 @@ struct mmap_arg_struct { asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) { struct mmap_arg_struct a; - struct file *file = NULL; - unsigned long retval; - struct mm_struct *mm ; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; @@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) return -EINVAL; - if (!(a.flags & MAP_ANONYMOUS)) { - file = fget(a.fd); - if (!file) - return -EBADF; - } - - mm = current->mm; - down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset>>PAGE_SHIFT); - if (file) - fput(file); - - up_write(&mm->mmap_sem); - - return retval; } asmlinkage long sys32_mprotect(unsigned long start, size_t len, @@ -483,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return ret; } -asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct mm_struct *mm = current->mm; - unsigned long error; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return -EBADF; - } - - down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(&mm->mmap_sem); - - if (file) - fput(file); - return error; -} - asmlinkage long sys32_olduname(struct oldold_utsname __user *name) { char *arch = "x86_64"; diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 9af9decb38c3..4a5a089e1c62 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -57,9 +57,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - struct oldold_utsname; struct old_utsname; asmlinkage long sys32_olduname(struct oldold_utsname __user *); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 372b76edd63f..1bb6e395881c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -55,8 +55,6 @@ struct sel_arg_struct; struct oldold_utsname; struct old_utsname; -asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); asmlinkage int old_mmap(struct mmap_arg_struct __user *); asmlinkage int old_select(struct sel_arg_struct __user *); asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 1884a8d12bfa..dee1ff7cba58 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -24,31 +24,6 @@ #include -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file *file = NULL; - struct mm_struct *mm = current->mm; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(&mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); out: return err; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 45e00eb09c3a..8aa2057efd12 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, fd, unsigned long, off) { long error; - struct file *file; - error = -EINVAL; if (off & ~PAGE_MASK) goto out; - error = -EBADF; - file = NULL; - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); + error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return error; } diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 70c2125d55b9..15228b5d3eb7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -191,7 +191,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* reserved for streams2 */ .long ptregs_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 + .long sys_mmap_pgoff .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 05cebf8f62b1..4352dbe1186a 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -13,8 +13,6 @@ struct sigaction; asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*); asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*); asmlinkage long xtensa_pipe(int __user *); -asmlinkage long xtensa_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); asmlinkage long xtensa_ptrace(long, long, long, long); asmlinkage long xtensa_sigreturn(struct pt_regs*); asmlinkage long xtensa_rt_sigreturn(struct pt_regs*); diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index 4e55dc763021..fbf318b3af3e 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -189,7 +189,7 @@ __SYSCALL( 79, sys_fremovexattr, 2) /* File Map / Shared Memory Operations */ #define __NR_mmap2 80 -__SYSCALL( 80, xtensa_mmap2, 6) +__SYSCALL( 80, sys_mmap_pgoff, 6) #define __NR_munmap 81 __SYSCALL( 81, sys_munmap, 2) #define __NR_mprotect 82 diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index ac15ecbdf919..1e67bab775c1 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -57,31 +57,6 @@ asmlinkage long xtensa_pipe(int __user *userfds) return error; } - -asmlinkage long xtensa_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg) { unsigned long ret; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index bc70c5810fec..939a61507ac5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -834,4 +834,8 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_event_open( struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); + +asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); #endif diff --git a/mm/util.c b/mm/util.c index 7c35ad95f927..3bf81b294ae8 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,6 +4,10 @@ #include #include #include +#include +#include +#include +#include #include #define CREATE_TRACE_POINTS @@ -268,6 +272,31 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, } EXPORT_SYMBOL_GPL(get_user_pages_fast); +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + struct file * file = NULL; + unsigned long retval = -EBADF; + + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return retval; +} + /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); -- cgit v1.2.3 From a5d09d68335bb8422d5e7050c9f03f99ba6cfebd Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 11 Dec 2009 08:43:12 -0600 Subject: kgdb,x86: remove redundant test The for loop starts with a breakno of 0, and ends when it's 4. so this test is always true. Signed-off-by: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 20a5b3689463..f93d015753ce 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -220,8 +220,7 @@ static void kgdb_correct_hw_break(void) dr7 |= ((breakinfo[breakno].len << 2) | breakinfo[breakno].type) << ((breakno << 2) + 16); - if (breakno >= 0 && breakno <= 3) - set_debugreg(breakinfo[breakno].addr, breakno); + set_debugreg(breakinfo[breakno].addr, breakno); } else { if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { -- cgit v1.2.3 From cf6f196d112a6f6757b1ca3cce0b576f7abee479 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 11 Dec 2009 08:43:16 -0600 Subject: kgdb,i386: Fix corner case access to ss with NMI watch dog exception It is possible for the user_mode_vm(regs) check to return true on the i368 arch for a non master kgdb cpu or when the master kgdb cpu handles the NMI watch dog exception. The solution is simply to select the correct gdb_ss location based on the check to user_mode_vm(regs). CC: Ingo Molnar Acked-by: H. Peter Anvin Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f93d015753ce..aefae46aa646 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -86,9 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_DS] = regs->ds; gdb_regs[GDB_ES] = regs->es; gdb_regs[GDB_CS] = regs->cs; - gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; + if (user_mode_vm(regs)) { + gdb_regs[GDB_SS] = regs->ss; + gdb_regs[GDB_SP] = regs->sp; + } else { + gdb_regs[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); + } #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +107,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; -#endif gdb_regs[GDB_SP] = kernel_stack_pointer(regs); +#endif } /** -- cgit v1.2.3 From 8097551d9ab9b9e3630694ad1bc6e12c597c515e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 11 Dec 2009 08:43:18 -0600 Subject: kgdb,x86: do not set kgdb_single_step on x86 On an SMP system the kgdb_single_step flag has the possibility to indefinitely hang the system in the case. Consider the case where, CPU 1 has the schedule lock and CPU 0 is set to single step, there is no way for CPU 0 to run another task. The easy way to observe the problem is to make 2 cpus busy, and run the kgdb test suite. You will see that it hangs the system very quickly. while [ 1 ] ; do find /proc > /dev/null 2>&1 ; done & while [ 1 ] ; do find /proc > /dev/null 2>&1 ; done & echo V1 > /sys/module/kgdbts/parameters/kgdbts The side effect of this patch is that there is the possibility to miss a breakpoint in the case that a single step operation was executed to step over a breakpoint in common code. The trade off of the missed breakpoint is preferred to hanging the kernel. This can be fixed in the future by using kprobes or another strategy to step over planted breakpoints with out of line execution. CC: Ingo Molnar Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index aefae46aa646..dd74fe7273b1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -400,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; - kgdb_single_step = 1; atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); } -- cgit v1.2.3 From 505422517d3f126bb939439e9d15dece94e11d2c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 11 Dec 2009 18:14:40 +0100 Subject: x86, msr: Add support for non-contiguous cpumasks The current rd/wrmsr_on_cpus helpers assume that the supplied cpumasks are contiguous. However, there are machines out there like some K8 multinode Opterons which have a non-contiguous core enumeration on each node (e.g. cores 0,2 on node 0 instead of 0,1), see http://www.gossamer-threads.com/lists/linux/kernel/1160268. This patch fixes out-of-bounds writes (see URL above) by adding per-CPU msr structs which are used on the respective cores. Additionally, two helpers, msrs_{alloc,free}, are provided for use by the callers of the MSR accessors. Cc: H. Peter Anvin Cc: Mauro Carvalho Chehab Cc: Aristeu Rozanski Cc: Randy Dunlap Cc: Doug Thompson Signed-off-by: Borislav Petkov LKML-Reference: <20091211171440.GD31998@aftab> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 3 +++ arch/x86/lib/msr.c | 26 ++++++++++++++++++++++---- drivers/edac/amd64_edac.c | 46 +++++++++++++++++----------------------------- 3 files changed, 42 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 5bef931f8b14..2d228fc9b4b7 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -244,6 +244,9 @@ do { \ #define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) +struct msr *msrs_alloc(void); +void msrs_free(struct msr *msrs); + #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 41628b104b9e..872834177937 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -7,7 +7,6 @@ struct msr_info { u32 msr_no; struct msr reg; struct msr *msrs; - int off; int err; }; @@ -18,7 +17,7 @@ static void __rdmsr_on_cpu(void *info) int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = &rv->msrs[this_cpu - rv->off]; + reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; @@ -32,7 +31,7 @@ static void __wrmsr_on_cpu(void *info) int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = &rv->msrs[this_cpu - rv->off]; + reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; @@ -80,7 +79,6 @@ static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, memset(&rv, 0, sizeof(rv)); - rv.off = cpumask_first(mask); rv.msrs = msrs; rv.msr_no = msr_no; @@ -120,6 +118,26 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) } EXPORT_SYMBOL(wrmsr_on_cpus); +struct msr *msrs_alloc(void) +{ + struct msr *msrs = NULL; + + msrs = alloc_percpu(struct msr); + if (!msrs) { + pr_warning("%s: error allocating msrs\n", __func__); + return NULL; + } + + return msrs; +} +EXPORT_SYMBOL(msrs_alloc); + +void msrs_free(struct msr *msrs) +{ + free_percpu(msrs); +} +EXPORT_SYMBOL(msrs_free); + /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5fdd6daa40ea..df5b68433f34 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -13,6 +13,8 @@ module_param(report_gart_errors, int, 0644); static int ecc_enable_override; module_param(ecc_enable_override, int, 0644); +static struct msr *msrs; + /* Lookup table for all possible MC control instances */ struct amd64_pvt; static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; @@ -2495,8 +2497,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid) static bool amd64_nb_mce_bank_enabled_on_node(int nid) { cpumask_var_t mask; - struct msr *msrs; - int cpu, nbe, idx = 0; + int cpu, nbe; bool ret = false; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { @@ -2507,32 +2508,22 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid) get_cpus_on_this_dct_cpumask(mask, nid); - msrs = kzalloc(sizeof(struct msr) * cpumask_weight(mask), GFP_KERNEL); - if (!msrs) { - amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", - __func__); - free_cpumask_var(mask); - return false; - } - rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs); for_each_cpu(cpu, mask) { - nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE; + struct msr *reg = per_cpu_ptr(msrs, cpu); + nbe = reg->l & K8_MSR_MCGCTL_NBE; debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", - cpu, msrs[idx].q, + cpu, reg->q, (nbe ? "enabled" : "disabled")); if (!nbe) goto out; - - idx++; } ret = true; out: - kfree(msrs); free_cpumask_var(mask); return ret; } @@ -2540,8 +2531,7 @@ out: static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) { cpumask_var_t cmask; - struct msr *msrs = NULL; - int cpu, idx = 0; + int cpu; if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { amd64_printk(KERN_WARNING, "%s: error allocating mask\n", @@ -2551,34 +2541,27 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id); - msrs = kzalloc(sizeof(struct msr) * cpumask_weight(cmask), GFP_KERNEL); - if (!msrs) { - amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", - __func__); - return -ENOMEM; - } - rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); for_each_cpu(cpu, cmask) { + struct msr *reg = per_cpu_ptr(msrs, cpu); + if (on) { - if (msrs[idx].l & K8_MSR_MCGCTL_NBE) + if (reg->l & K8_MSR_MCGCTL_NBE) pvt->flags.ecc_report = 1; - msrs[idx].l |= K8_MSR_MCGCTL_NBE; + reg->l |= K8_MSR_MCGCTL_NBE; } else { /* * Turn off ECC reporting only when it was off before */ if (!pvt->flags.ecc_report) - msrs[idx].l &= ~K8_MSR_MCGCTL_NBE; + reg->l &= ~K8_MSR_MCGCTL_NBE; } - idx++; } wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); - kfree(msrs); free_cpumask_var(cmask); return 0; @@ -3036,6 +3019,8 @@ static int __init amd64_edac_init(void) if (cache_k8_northbridges() < 0) return err; + msrs = msrs_alloc(); + err = pci_register_driver(&amd64_pci_driver); if (err) return err; @@ -3071,6 +3056,9 @@ static void __exit amd64_edac_exit(void) edac_pci_release_generic_ctl(amd64_ctl_pci); pci_unregister_driver(&amd64_pci_driver); + + msrs_free(msrs); + msrs = NULL; } module_init(amd64_edac_init); -- cgit v1.2.3 From 450b1e8dd10f41b5adad73f48ce8f6707d17c5c4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 11 Dec 2009 08:08:50 -0800 Subject: x86: Remove enabling x2apic message for every CPU Print only once that the system is supporting x2apic mode. Signed-off-by: Mike Travis Acked-by: Cyrill Gorcunov LKML-Reference: <4B226E92.5080904@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index efb2b9cd132c..aa57c079c98f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1341,7 +1341,7 @@ void enable_x2apic(void) rdmsr(MSR_IA32_APICBASE, msr, msr2); if (!(msr & X2APIC_ENABLE)) { - pr_info("Enabling x2apic\n"); + printk_once(KERN_INFO "Enabling x2apic\n"); wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } -- cgit v1.2.3 From 2eaad1fddd7450a48ad464229775f97fbfe8af36 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 10 Dec 2009 17:19:36 -0800 Subject: x86: Limit the number of processor bootup messages When there are a large number of processors in a system, there is an excessive amount of messages sent to the system console. It's estimated that with 4096 processors in a system, and the console baudrate set to 56K, the startup messages will take about 84 minutes to clear the serial port. This set of patches limits the number of repetitious messages which contain no additional information. Much of this information is obtainable from the /proc and /sysfs. Some of the messages are also sent to the kernel log buffer as KERN_DEBUG messages so dmesg can be used to examine more closely any details specific to a problem. The new cpu bootup sequence for system_state == SYSTEM_BOOTING: Booting Node 0, Processors #1 #2 #3 #4 #5 #6 #7 Ok. Booting Node 1, Processors #8 #9 #10 #11 #12 #13 #14 #15 Ok. ... Booting Node 3, Processors #56 #57 #58 #59 #60 #61 #62 #63 Ok. Brought up 64 CPUs After the system is running, a single line boot message is displayed when CPU's are hotplugged on: Booting Node %d Processor %d APIC 0x%x Status of the following lines: CPU: Physical Processor ID: printed once (for boot cpu) CPU: Processor Core ID: printed once (for boot cpu) CPU: Hyper-Threading is disabled printed once (for boot cpu) CPU: Thermal monitoring enabled printed once (for boot cpu) CPU %d/0x%x -> Node %d: removed CPU %d is now offline: only if system_state == RUNNING Initializing CPU#%d: KERN_DEBUG Signed-off-by: Mike Travis LKML-Reference: <4B219E28.8080601@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/addon_cpuid_features.c | 15 ++++++---- arch/x86/kernel/cpu/amd.c | 2 -- arch/x86/kernel/cpu/common.c | 8 ++++-- arch/x86/kernel/cpu/intel.c | 2 -- arch/x86/kernel/cpu/mcheck/therm_throt.c | 4 +-- arch/x86/kernel/smpboot.c | 45 ++++++++++++++++++++---------- 6 files changed, 47 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c965e5212714..468489b57aae 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -74,6 +74,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; + static bool printed; if (c->cpuid_level < 0xb) return; @@ -127,12 +128,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) c->x86_max_cores = (core_level_siblings / smp_num_siblings); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); + if (!printed) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + printed = 1; + } return; #endif } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7128b3799cec..8dc3ea145c97 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -375,8 +375,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = nearby_node(apicid); } numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c1afa990a6c8..0ee9a3254eec 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -427,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; + static bool printed; if (!cpu_has(c, X86_FEATURE_HT)) return; @@ -442,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); goto out; } @@ -469,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) ((1 << core_bits) - 1); out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { + if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); + printed = 1; } #endif } @@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void) if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) panic("CPU#%d already initialized!\n", cpu); - printk(KERN_INFO "Initializing CPU#%d\n", cpu); + pr_debug("Initializing CPU#%d\n", cpu); clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c900b73f9224..9c31e8b09d2c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -270,8 +270,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = cpu_to_node(cpu); } numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 4fef985fc221..1003ed4bbce4 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -339,8 +339,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); + printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", + tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 29e6744f51e3..678d0b8c26f3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -671,6 +671,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } +/* reduce the number of lines printed when booting a large cpu count system */ +static void __cpuinit announce_cpu(int cpu, int apicid) +{ + static int current_node = -1; + int node = cpu_to_node(cpu); + + if (system_state == SYSTEM_BOOTING) { + if (node != current_node) { + if (current_node > (-1)) + pr_cont(" Ok.\n"); + current_node = node; + pr_info("Booting Node %3d, Processors ", node); + } + pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); + return; + } else + pr_info("Booting Node %d Processor %d APIC 0x%x\n", + node, cpu, apicid); +} + /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -737,9 +757,8 @@ do_rest: /* start_ip had better be page-aligned! */ start_ip = setup_trampoline(); - /* So we see what's up */ - printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", - cpu, apicid, start_ip); + /* So we see what's up */ + announce_cpu(cpu, apicid); /* * This grunge runs the startup process for @@ -788,21 +807,17 @@ do_rest: udelay(100); } - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - pr_debug("OK.\n"); - printk(KERN_INFO "CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - pr_debug("CPU has booted.\n"); - } else { + if (cpumask_test_cpu(cpu, cpu_callin_mask)) + pr_debug("CPU%d: has booted.\n", cpu); + else { boot_error = 1; if (*((volatile unsigned char *)trampoline_base) == 0xA5) /* trampoline started but...? */ - printk(KERN_ERR "Stuck ??\n"); + pr_err("CPU%d: Stuck ??\n", cpu); else /* trampoline code not run */ - printk(KERN_ERR "Not responding.\n"); + pr_err("CPU%d: Not responding.\n", cpu); if (apic->inquire_remote_apic) apic->inquire_remote_apic(apicid); } @@ -1293,14 +1308,16 @@ void native_cpu_die(unsigned int cpu) for (i = 0; i < 10; i++) { /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - printk(KERN_INFO "CPU %d is now offline\n", cpu); + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); + if (1 == num_online_cpus()) alternatives_smp_switch(0); return; } msleep(100); } - printk(KERN_ERR "CPU %u didn't die...\n", cpu); + pr_err("CPU %u didn't die...\n", cpu); } void play_dead_common(void) -- cgit v1.2.3 From 559df2e0210352f83926d178c40c51142292a18c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 19 Apr 2009 22:35:10 +0200 Subject: kbuild: move asm-offsets.h to include/generated The simplest method was to add an extra asm-offsets.h file in arch/$ARCH/include/asm that references the generated file. We can now migrate the architectures one-by-one to reference the generated file direct - and when done we can delete the temporary arch/$ARCH/include/asm/asm-offsets.h file. Signed-off-by: Sam Ravnborg Cc: Al Viro Signed-off-by: Michal Marek --- .gitignore | 1 - Kbuild | 2 +- Makefile | 1 - arch/alpha/include/asm/asm-offsets.h | 1 + arch/arm/include/asm/asm-offsets.h | 1 + arch/avr32/include/asm/asm-offsets.h | 1 + arch/blackfin/include/asm/asm-offsets.h | 1 + arch/cris/include/asm/asm-offsets.h | 1 + arch/frv/include/asm/asm-offsets.h | 1 + arch/h8300/include/asm/asm-offsets.h | 1 + arch/ia64/include/asm/asm-offsets.h | 1 + arch/m68k/include/asm/asm-offsets.h | 1 + arch/microblaze/include/asm/asm-offsets.h | 1 + arch/mips/include/asm/asm-offsets.h | 1 + arch/mn10300/include/asm/asm-offsets.h | 1 + arch/parisc/include/asm/asm-offsets.h | 1 + arch/powerpc/include/asm/asm-offsets.h | 1 + arch/s390/include/asm/asm-offsets.h | 1 + arch/sh/include/asm/asm-offsets.h | 1 + arch/sparc/include/asm/asm-offsets.h | 1 + arch/um/Makefile | 2 +- arch/um/include/asm/asm-offsets.h | 1 + arch/x86/include/asm/asm-offsets.h | 1 + arch/xtensa/include/asm/asm-offsets.h | 1 + 24 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 arch/alpha/include/asm/asm-offsets.h create mode 100644 arch/arm/include/asm/asm-offsets.h create mode 100644 arch/avr32/include/asm/asm-offsets.h create mode 100644 arch/blackfin/include/asm/asm-offsets.h create mode 100644 arch/cris/include/asm/asm-offsets.h create mode 100644 arch/frv/include/asm/asm-offsets.h create mode 100644 arch/h8300/include/asm/asm-offsets.h create mode 100644 arch/ia64/include/asm/asm-offsets.h create mode 100644 arch/m68k/include/asm/asm-offsets.h create mode 100644 arch/microblaze/include/asm/asm-offsets.h create mode 100644 arch/mips/include/asm/asm-offsets.h create mode 100644 arch/mn10300/include/asm/asm-offsets.h create mode 100644 arch/parisc/include/asm/asm-offsets.h create mode 100644 arch/powerpc/include/asm/asm-offsets.h create mode 100644 arch/s390/include/asm/asm-offsets.h create mode 100644 arch/sh/include/asm/asm-offsets.h create mode 100644 arch/sparc/include/asm/asm-offsets.h create mode 100644 arch/um/include/asm/asm-offsets.h create mode 100644 arch/x86/include/asm/asm-offsets.h create mode 100644 arch/xtensa/include/asm/asm-offsets.h (limited to 'arch/x86') diff --git a/.gitignore b/.gitignore index 36d9cd6d4281..3582f422813b 100644 --- a/.gitignore +++ b/.gitignore @@ -46,7 +46,6 @@ Module.symvers # Generated include files # include/asm -include/asm-*/asm-offsets.h include/config include/linux/autoconf.h include/linux/compile.h diff --git a/Kbuild b/Kbuild index 1165d7a5ca4a..e3737ad72b5a 100644 --- a/Kbuild +++ b/Kbuild @@ -43,7 +43,7 @@ $(obj)/$(bounds-file): kernel/bounds.s Kbuild # 2) Generate asm-offsets.h # -offsets-file := include/asm/asm-offsets.h +offsets-file := include/generated/asm-offsets.h always += $(offsets-file) targets += $(offsets-file) diff --git a/Makefile b/Makefile index b58e9312ce30..eb43b9fa30b5 100644 --- a/Makefile +++ b/Makefile @@ -1197,7 +1197,6 @@ MRPROPER_DIRS += include/config include2 usr/include include/generated MRPROPER_FILES += .config .config.old include/asm .version .old_version \ include/linux/autoconf.h include/linux/version.h \ include/linux/utsrelease.h \ - include/asm*/asm-offsets.h \ Module.symvers Module.markers tags TAGS cscope* # clean - Delete most, but leave enough to build external modules diff --git a/arch/alpha/include/asm/asm-offsets.h b/arch/alpha/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/alpha/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/asm-offsets.h b/arch/arm/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/arm/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/avr32/include/asm/asm-offsets.h b/arch/avr32/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/avr32/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/blackfin/include/asm/asm-offsets.h b/arch/blackfin/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/blackfin/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/cris/include/asm/asm-offsets.h b/arch/cris/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/cris/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/frv/include/asm/asm-offsets.h b/arch/frv/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/frv/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/h8300/include/asm/asm-offsets.h b/arch/h8300/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/h8300/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/asm-offsets.h b/arch/ia64/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/ia64/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/m68k/include/asm/asm-offsets.h b/arch/m68k/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/m68k/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/microblaze/include/asm/asm-offsets.h b/arch/microblaze/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/microblaze/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/asm-offsets.h b/arch/mips/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/mips/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/mn10300/include/asm/asm-offsets.h b/arch/mn10300/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/mn10300/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/asm-offsets.h b/arch/parisc/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/parisc/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/powerpc/include/asm/asm-offsets.h b/arch/powerpc/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/powerpc/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/s390/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/sh/include/asm/asm-offsets.h b/arch/sh/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/sh/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/sparc/include/asm/asm-offsets.h b/arch/sparc/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/sparc/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/um/Makefile b/arch/um/Makefile index fc633dbacf84..fab8121d2b32 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -149,6 +149,6 @@ $(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s $(SHARED_HEADERS)/kern_constants.h: $(Q)mkdir -p $(dir $@) - $(Q)echo '#include "../../../../include/asm/asm-offsets.h"' >$@ + $(Q)echo '#include "../../../../include/generated/asm-offsets.h"' >$@ export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH diff --git a/arch/um/include/asm/asm-offsets.h b/arch/um/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/um/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/x86/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/xtensa/include/asm/asm-offsets.h b/arch/xtensa/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/xtensa/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include -- cgit v1.2.3 From 92045954058671fdd0ccf031ca06611ce1d929d1 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 18 Oct 2009 00:36:47 +0200 Subject: kbuild: move compile.h to include/generated Signed-off-by: Sam Ravnborg Signed-off-by: Michal Marek --- .gitignore | 1 - arch/x86/boot/version.c | 2 +- init/Makefile | 8 ++------ init/version.c | 2 +- 4 files changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/.gitignore b/.gitignore index e074c1cc5794..c06b4c9aeb72 100644 --- a/.gitignore +++ b/.gitignore @@ -47,7 +47,6 @@ Module.symvers # include/config include/linux/autoconf.h -include/linux/compile.h include/linux/version.h include/linux/utsrelease.h include/generated diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index 2723d9b5ce43..4d88763e39cb 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -14,7 +14,7 @@ #include "boot.h" #include -#include +#include const char kernel_version[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " diff --git a/init/Makefile b/init/Makefile index 4a243df426f7..0bf677aa0872 100644 --- a/init/Makefile +++ b/init/Makefile @@ -15,12 +15,8 @@ mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o -# files to be removed upon make clean -clean-files := ../include/linux/compile.h - # dependencies on generated files need to be listed explicitly - -$(obj)/version.o: include/linux/compile.h +$(obj)/version.o: include/generated/compile.h # compile.h changes depending on hostname, generation number, etc, # so we regenerate it always. @@ -30,7 +26,7 @@ $(obj)/version.o: include/linux/compile.h chk_compile.h = : quiet_chk_compile.h = echo ' CHK $@' silent_chk_compile.h = : -include/linux/compile.h: FORCE +include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" diff --git a/init/version.c b/init/version.c index 52a8b98642b8..82328aaca1ef 100644 --- a/init/version.c +++ b/init/version.c @@ -6,7 +6,7 @@ * May be freely distributed as part of Linux. */ -#include +#include #include #include #include -- cgit v1.2.3 From 273b281fa22c293963ee3e6eec418f5dda2dbc83 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 18 Oct 2009 00:52:28 +0200 Subject: kbuild: move utsrelease.h to include/generated Fix up all users of utsrelease.h Signed-off-by: Sam Ravnborg Signed-off-by: Michal Marek --- .gitignore | 1 - Makefile | 5 ++--- arch/alpha/boot/bootp.c | 2 +- arch/alpha/boot/bootpz.c | 2 +- arch/alpha/boot/main.c | 2 +- arch/frv/kernel/setup.c | 2 +- arch/powerpc/platforms/52xx/efika.c | 2 +- arch/powerpc/platforms/amigaone/setup.c | 2 +- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/powermac/bootx_init.c | 2 +- arch/x86/boot/header.S | 2 +- arch/x86/boot/version.c | 2 +- drivers/staging/panel/panel.c | 2 +- include/linux/vermagic.h | 2 +- init/version.c | 2 +- kernel/kexec.c | 2 +- kernel/trace/trace.c | 2 +- 17 files changed, 17 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/.gitignore b/.gitignore index c6c19ea6ea96..002d5304968b 100644 --- a/.gitignore +++ b/.gitignore @@ -47,7 +47,6 @@ Module.symvers # include/config include/linux/version.h -include/linux/utsrelease.h include/generated # stgit generated dirs diff --git a/Makefile b/Makefile index 3bdd932e3d88..860224d7cbcf 100644 --- a/Makefile +++ b/Makefile @@ -968,7 +968,7 @@ endif # prepare2 creates a makefile if using a separate output directory prepare2: prepare3 outputmakefile -prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \ +prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \ include/config/auto.conf $(cmd_crmodverdir) @@ -1005,7 +1005,7 @@ endef include/linux/version.h: $(srctree)/Makefile FORCE $(call filechk,version.h) -include/linux/utsrelease.h: include/config/kernel.release FORCE +include/generated/utsrelease.h: include/config/kernel.release FORCE $(call filechk,utsrelease.h) PHONY += headerdep @@ -1151,7 +1151,6 @@ CLEAN_FILES += vmlinux System.map \ MRPROPER_DIRS += include/config usr/include include/generated MRPROPER_FILES += .config .config.old .version .old_version \ include/linux/version.h \ - include/linux/utsrelease.h \ Module.symvers Module.markers tags TAGS cscope* # clean - Delete most, but leave enough to build external modules diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 3af21c789339..3c8d1b25c661 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 1036b515e20c..ade3f129dc27 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -11,7 +11,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c index 89f3be071ae5..644b7db55438 100644 --- a/arch/alpha/boot/main.c +++ b/arch/alpha/boot/main.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index 55e4fab7c0bc..75cf7f4b2fa8 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index bcc69e1f77c1..45c0cb9b67e6 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 9290a7a442d0..fb4eb0df054c 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index cd4ad9aea760..0a6f5ab8aab3 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index cf660916ae0b..9dd789a7370d 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b31cc54b4641..93e689f4bd86 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index 4d88763e39cb..2b15aa488ffb 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -13,7 +13,7 @@ */ #include "boot.h" -#include +#include #include const char kernel_version[] = diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c index 4ce399b6d237..f98a52448eae 100644 --- a/drivers/staging/panel/panel.c +++ b/drivers/staging/panel/panel.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 79b9837d9ca0..cf97b5b9d1fe 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -1,4 +1,4 @@ -#include +#include #include /* Simply sanity version stamp for modules. */ diff --git a/init/version.c b/init/version.c index 82328aaca1ef..adff586401a5 100644 --- a/init/version.c +++ b/init/version.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #ifndef CONFIG_KALLSYMS diff --git a/kernel/kexec.c b/kernel/kexec.c index f336e2107f98..83f54e2a6eed 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 88bd9ae2a9ed..bfb1b64bfa9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -12,7 +12,7 @@ * Copyright (C) 2004 William Lee Irwin III */ #include -#include +#include #include #include #include -- cgit v1.2.3 From 1d865fb728bd6bbcdfbd6ec1e2b8ade3b4805641 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 11 Dec 2009 11:36:18 -0600 Subject: x86: Fix duplicated UV BAU interrupt vector Interrupt vector 0xec has been doubly defined in irq_vectors.h It seems arbitrary whether LOCAL_PENDING_VECTOR or UV_BAU_MESSAGE is the higher number. As long as they are unique. If they are not unique we'll hit a BUG in alloc_system_vector(). Signed-off-by: Cliff Wickman Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6a635bd39867..4611f085cd43 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -113,7 +113,7 @@ */ #define LOCAL_PENDING_VECTOR 0xec -#define UV_BAU_MESSAGE 0xec +#define UV_BAU_MESSAGE 0xea /* * Self IPI vector for machine checks -- cgit v1.2.3 From eba11d6da7a983cedb0acf32a38e4d0daa8b5d0e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 13 Dec 2009 23:24:03 -0800 Subject: x86: Fix build warning in arch/x86/mm/mmio-mod.c Stephen Rothwell reported these warnings: arch/x86/mm/mmio-mod.c: In function 'print_pte': arch/x86/mm/mmio-mod.c:100: warning: too many arguments for format arch/x86/mm/mmio-mod.c:106: warning: too many arguments for format The 'fmt' was left out accidentally. Reported-by: Stephen Rothwell Signed-off-by: Joe Perches Cc: Peter Zijlstra Cc: Linus LKML-Reference: <1260775443.18538.16.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 4c765e9c4664..34a3291ca103 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -20,7 +20,7 @@ * Derived from the read-mod example from relay-examples by Tom Zanussi. */ -#define pr_fmt(fmt) "mmiotrace: " +#define pr_fmt(fmt) "mmiotrace: " fmt #define DEBUG 1 -- cgit v1.2.3 From f4780ca005404166cc40af77ef0e86132ab98a81 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 14 Dec 2009 11:52:14 +0900 Subject: x86: Move swiotlb initialization before dma32_free_bootmem The commit 75f1cdf1dda92cae037ec848ae63690d91913eac introduced a bug that we initialize SWIOTLB right after dma32_free_bootmem so we wrongly steal memory area allocated for GART with broken BIOS earlier. This moves swiotlb initialization before dma32_free_bootmem(). Signed-off-by: FUJITA Tomonori Cc: yinghai@kernel.org LKML-Reference: <1260759135-6450-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index afcc58b69c7c..fcc2f2bfa39c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -120,11 +120,14 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { + int use_swiotlb; + + use_swiotlb = pci_swiotlb_init(); #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif - if (pci_swiotlb_init()) + if (use_swiotlb) return; gart_iommu_hole_init(); -- cgit v1.2.3 From f3eee54276dfd1117fd94259f2b4a38388264724 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 14 Dec 2009 11:52:15 +0900 Subject: x86: Gart: fix breakage due to IOMMU initialization cleanup This fixes the following breakage of the commit 75f1cdf1dda92cae037ec848ae63690d91913eac: - GART systems that don't AGP with broken BIOS and more than 4GB memory are forced to use swiotlb. They can allocate aperture by hand and use GART. - GART systems without GAP must disable GART on shutdown. - swiotlb usage is forced by the boot option, gart_iommu_hole_init() is not called, so we disable GART early_gart_iommu_check(). Signed-off-by: Yinghai Lu Signed-off-by: FUJITA Tomonori LKML-Reference: <1260759135-6450-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 11 ++++++----- arch/x86/kernel/pci-gart_64.c | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index e0dfb6856aa2..3704997e8b25 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -280,7 +280,8 @@ void __init early_gart_iommu_check(void) * or BIOS forget to put that in reserved. * try to update e820 to make that region as reserved. */ - int i, fix, slot; + u32 agp_aper_base = 0, agp_aper_order = 0; + int i, fix, slot, valid_agp = 0; u32 ctl; u32 aper_size = 0, aper_order = 0, last_aper_order = 0; u64 aper_base = 0, last_aper_base = 0; @@ -290,6 +291,8 @@ void __init early_gart_iommu_check(void) return; /* This is mostly duplicate of iommu_hole_init */ + agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); + fix = 0; for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; @@ -342,10 +345,10 @@ void __init early_gart_iommu_check(void) } } - if (!fix) + if (valid_agp) return; - /* different nodes have different setting, disable them all at first*/ + /* disable them all at first */ for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; @@ -458,8 +461,6 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (!valid_agp) { - /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e6a0d402f171..56c0e730d3fe 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void) struct pci_dev *dev; int i; - if (no_agp) + /* don't shutdown it if there is AGP installed */ + if (!no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { -- cgit v1.2.3 From 485a2e1973fd9f98c2c6776e66ac4721882b69e0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 14 Dec 2009 17:56:34 +0900 Subject: x86, mce: Thermal monitoring depends on APIC being enabled Add check if APIC is not disabled since thermal monitoring depends on it. As only apic gets disabled we should not try to install "thermal monitor" vector, print out that thermal monitoring is enabled and etc... Note that "Intel Correct Machine Check Interrupts" already has such a check. Also I decided to not add cpu_has_apic check into mcheck_intel_therm_init since even if it'll call apic_read on disabled apic -- it's safe here and allow us to save a few code bytes. Reported-by: Thomas Gleixner Signed-off-by: Cyrill Gorcunov Signed-off-by: Hidetoshi Seto LKML-Reference: <4B25FDC2.3020401@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1003ed4bbce4..0a9b57702be4 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -274,8 +274,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c) int tm2 = 0; u32 l, h; - /* Thermal monitoring depends on ACPI and clock modulation*/ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + /* Thermal monitoring depends on APIC, ACPI and clock modulation */ + if (!cpu_has_apic || !cpu_has(c, X86_FEATURE_ACPI) || + !cpu_has(c, X86_FEATURE_ACC)) return; /* -- cgit v1.2.3 From 70fe440718d9f42bf963c2cffe12008eb5556165 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 14 Dec 2009 17:57:00 +0900 Subject: x86, mce: Clean up thermal init by introducing intel_thermal_supported() It looks better to have a common function. No change in functionality. Signed-off-by: Hidetoshi Seto Cc: Cyrill Gorcunov LKML-Reference: <4B25FDDC.407@jp.fujitsu.com> Signed-off-by: Ingo Molnar Cc: Cyrill Gorcunov --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 0a9b57702be4..81c499eceb21 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -256,6 +256,16 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +/* Thermal monitoring depends on APIC, ACPI and clock modulation */ +static int intel_thermal_supported(struct cpuinfo_x86 *c) +{ + if (!cpu_has_apic) + return 0; + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + return 0; + return 1; +} + void __init mcheck_intel_therm_init(void) { /* @@ -263,8 +273,7 @@ void __init mcheck_intel_therm_init(void) * LVT value on BSP and use that value to restore APs' thermal LVT * entry BIOS programmed later */ - if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && - cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + if (intel_thermal_supported(&boot_cpu_data)) lvtthmr_init = apic_read(APIC_LVTTHMR); } @@ -274,9 +283,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) int tm2 = 0; u32 l, h; - /* Thermal monitoring depends on APIC, ACPI and clock modulation */ - if (!cpu_has_apic || !cpu_has(c, X86_FEATURE_ACPI) || - !cpu_has(c, X86_FEATURE_ACC)) + if (!intel_thermal_supported(c)) return; /* -- cgit v1.2.3 From 494c2ebfb287eb10b229415063099e3700639028 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 14 Dec 2009 10:02:18 -0800 Subject: x86, msr: Remove incorrect, duplicated code in the MSR driver The MSR driver would compute the values for cpu and c at declaration, and then again in the body of the function. This isn't merely redundant, but unsafe, since cpu might not refer to a valid CPU at that point. Remove the unnecessary and dangerous references in the declarations. This code now matches the equivalent code in the CPUID driver. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/msr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 553449951b84..572b07eee3f4 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -172,11 +172,10 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) static int msr_open(struct inode *inode, struct file *file) { - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &cpu_data(cpu); + unsigned int cpu; + struct cpuinfo_x86 *c; cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ -- cgit v1.2.3 From 873b5271f878a11729fb4602c6ce967d0ff81119 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 14 Dec 2009 13:55:20 -0800 Subject: x86: Regex support and known-movable symbols for relocs, fix _end This adds a new category of symbols to the relocs program: symbols which are known to be relative, even though the linker emits them as absolute; this is the case for symbols that live in the linker script, which currently applies to _end. Unfortunately the previous workaround of putting _end in its own empty section was defeated by newer binutils, which remove empty sections completely. This patch also changes the symbol matching to use regular expressions instead of hardcoded C for specific patterns. This is a decidedly non-minimal patch: a modified version of the relocs program is used as part of the Syslinux build, and this is basically a backport to Linux of some of those changes; they have thus been well tested. Signed-off-by: H. Peter Anvin LKML-Reference: <4AF86211.3070103@zytor.com> Acked-by: Michal Marek Tested-by: Sedat Dilek --- arch/x86/boot/compressed/relocs.c | 87 ++++++++++++++++++++++++++------------- arch/x86/kernel/vmlinux.lds.S | 4 +- 2 files changed, 60 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index bbeb0c3fbd90..89bbf4e4d05d 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -9,6 +9,9 @@ #include #define USE_BSD #include +#include + +static void die(char *fmt, ...); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static Elf32_Ehdr ehdr; @@ -30,25 +33,47 @@ static struct section *secs; * the address for which it has been compiled. Don't warn user about * absolute relocations present w.r.t these symbols. */ -static const char* safe_abs_relocs[] = { - "xen_irq_disable_direct_reloc", - "xen_save_fl_direct_reloc", -}; +static const char abs_sym_regex[] = + "^(xen_irq_disable_direct_reloc$|" + "xen_save_fl_direct_reloc$|" + "VDSO|" + "__crc_)"; +static regex_t abs_sym_regex_c; +static int is_abs_reloc(const char *sym_name) +{ + return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0); +} -static int is_safe_abs_reloc(const char* sym_name) +/* + * These symbols are known to be relative, even if the linker marks them + * as absolute (typically defined outside any section in the linker script.) + */ +static const char rel_sym_regex[] = + "^_end$"; +static regex_t rel_sym_regex_c; +static int is_rel_reloc(const char *sym_name) { - int i; + return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); +} - for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { - if (!strcmp(sym_name, safe_abs_relocs[i])) - /* Match found */ - return 1; - } - if (strncmp(sym_name, "VDSO", 4) == 0) - return 1; - if (strncmp(sym_name, "__crc_", 6) == 0) - return 1; - return 0; +static void regex_init(void) +{ + char errbuf[128]; + int err; + + err = regcomp(&abs_sym_regex_c, abs_sym_regex, + REG_EXTENDED|REG_NOSUB); + if (err) { + regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); + die("%s", errbuf); + } + + err = regcomp(&rel_sym_regex_c, rel_sym_regex, + REG_EXTENDED|REG_NOSUB); + if (err) { + regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); + die("%s", errbuf); + } } static void die(char *fmt, ...) @@ -131,7 +156,7 @@ static const char *rel_type(unsigned type) #undef REL_TYPE }; const char *name = "unknown type rel type name"; - if (type < ARRAY_SIZE(type_name)) { + if (type < ARRAY_SIZE(type_name) && type_name[type]) { name = type_name[type]; } return name; @@ -448,7 +473,7 @@ static void print_absolute_relocs(void) * Before warning check if this absolute symbol * relocation is harmless. */ - if (is_safe_abs_reloc(name)) + if (is_abs_reloc(name) || is_rel_reloc(name)) continue; if (!printed) { @@ -501,21 +526,26 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); /* Don't visit relocations to absolute symbols */ - if (sym->st_shndx == SHN_ABS) { + if (sym->st_shndx == SHN_ABS && + !is_rel_reloc(sym_name(sym_strtab, sym))) { continue; } - if (r_type == R_386_NONE || r_type == R_386_PC32) { + switch (r_type) { + case R_386_NONE: + case R_386_PC32: /* * NONE can be ignored and and PC relative * relocations don't need to be adjusted. */ - } - else if (r_type == R_386_32) { + break; + case R_386_32: /* Visit relocations that need to be adjusted */ visit(rel, sym); - } - else { - die("Unsupported relocation type: %d\n", r_type); + break; + default: + die("Unsupported relocation type: %s (%d)\n", + rel_type(r_type), r_type); + break; } } } @@ -571,16 +601,15 @@ static void emit_relocs(int as_text) } else { unsigned char buf[4]; - buf[0] = buf[1] = buf[2] = buf[3] = 0; /* Print a stop */ - printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + fwrite("\0\0\0\0", 4, 1, stdout); /* Now print each relocation */ for (i = 0; i < reloc_count; i++) { buf[0] = (relocs[i] >> 0) & 0xff; buf[1] = (relocs[i] >> 8) & 0xff; buf[2] = (relocs[i] >> 16) & 0xff; buf[3] = (relocs[i] >> 24) & 0xff; - printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + fwrite(buf, 4, 1, stdout); } } } @@ -598,6 +627,8 @@ int main(int argc, char **argv) FILE *fp; int i; + regex_init(); + show_absolute_syms = 0; show_absolute_relocs = 0; as_text = 0; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f3f2104408d9..f92a0da608cb 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -319,9 +319,7 @@ SECTIONS __brk_limit = .; } - .end : AT(ADDR(.end) - LOAD_OFFSET) { - _end = .; - } + _end = .; STABS_DEBUG DWARF_DEBUG -- cgit v1.2.3 From 445c89514be242b1b0080056d50bdc1b72adeb5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Dec 2009 19:49:50 +0100 Subject: locking: Convert raw_spinlock to arch_spinlock The raw_spin* namespace was taken by lockdep for the architecture specific implementations. raw_spin_* would be the ideal name space for the spinlocks which are not converted to sleeping locks in preempt-rt. Linus suggested to convert the raw_ to arch_ locks and cleanup the name space instead of using an artifical name like core_spin, atomic_spin or whatever No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- arch/alpha/include/asm/spinlock.h | 6 +++--- arch/alpha/include/asm/spinlock_types.h | 2 +- arch/arm/include/asm/spinlock.h | 6 +++--- arch/arm/include/asm/spinlock_types.h | 2 +- arch/blackfin/include/asm/spinlock.h | 10 +++++----- arch/blackfin/include/asm/spinlock_types.h | 2 +- arch/cris/include/arch-v32/arch/spinlock.h | 12 ++++++------ arch/ia64/include/asm/spinlock.h | 26 +++++++++++++------------- arch/ia64/include/asm/spinlock_types.h | 2 +- arch/m32r/include/asm/spinlock.h | 6 +++--- arch/m32r/include/asm/spinlock_types.h | 2 +- arch/mips/include/asm/spinlock.h | 10 +++++----- arch/mips/include/asm/spinlock_types.h | 2 +- arch/parisc/include/asm/atomic.h | 6 +++--- arch/parisc/include/asm/spinlock.h | 8 ++++---- arch/parisc/include/asm/spinlock_types.h | 4 ++-- arch/parisc/lib/bitops.c | 2 +- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/include/asm/spinlock.h | 14 +++++++------- arch/powerpc/include/asm/spinlock_types.h | 2 +- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/lib/locks.c | 4 ++-- arch/powerpc/platforms/pasemi/setup.c | 2 +- arch/s390/include/asm/spinlock.h | 16 ++++++++-------- arch/s390/include/asm/spinlock_types.h | 2 +- arch/s390/lib/spinlock.c | 8 ++++---- arch/sh/include/asm/spinlock.h | 6 +++--- arch/sh/include/asm/spinlock_types.h | 2 +- arch/sparc/include/asm/spinlock_32.h | 6 +++--- arch/sparc/include/asm/spinlock_64.h | 8 ++++---- arch/sparc/include/asm/spinlock_types.h | 2 +- arch/x86/include/asm/paravirt.h | 12 ++++++------ arch/x86/include/asm/paravirt_types.h | 14 +++++++------- arch/x86/include/asm/spinlock.h | 30 +++++++++++++++--------------- arch/x86/include/asm/spinlock_types.h | 4 ++-- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/paravirt-spinlocks.c | 2 +- arch/x86/kernel/tsc_sync.c | 2 +- arch/x86/xen/spinlock.c | 16 ++++++++-------- include/asm-generic/bitops/atomic.h | 6 +++--- include/linux/spinlock.h | 4 ++-- include/linux/spinlock_types.h | 2 +- include/linux/spinlock_types_up.h | 4 ++-- include/linux/spinlock_up.h | 8 ++++---- kernel/lockdep.c | 2 +- kernel/trace/ring_buffer.c | 4 ++-- kernel/trace/trace.c | 18 +++++++++--------- kernel/trace/trace_clock.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 4 ++-- kernel/trace/trace_stack.c | 4 ++-- lib/spinlock_debug.c | 2 +- 51 files changed, 164 insertions(+), 164 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index e38fb95cb335..bdb26a1940b4 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -17,13 +17,13 @@ #define __raw_spin_unlock_wait(x) \ do { cpu_relax(); } while ((x)->lock) -static inline void __raw_spin_unlock(raw_spinlock_t * lock) +static inline void __raw_spin_unlock(arch_spinlock_t * lock) { mb(); lock->lock = 0; } -static inline void __raw_spin_lock(raw_spinlock_t * lock) +static inline void __raw_spin_lock(arch_spinlock_t * lock) { long tmp; @@ -43,7 +43,7 @@ static inline void __raw_spin_lock(raw_spinlock_t * lock) : "m"(lock->lock) : "memory"); } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { return !test_and_set_bit(0, &lock->lock); } diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h index 8141eb5ebf0d..bb94a51e53d2 100644 --- a/arch/alpha/include/asm/spinlock_types.h +++ b/arch/alpha/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned int lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index c13681ac1ede..4e7712ee9394 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -23,7 +23,7 @@ #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -43,7 +43,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) smp_mb(); } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; @@ -63,7 +63,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) } } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { smp_mb(); diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index 43e83f6d2ee5..5e9d3eadd167 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned int lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index b0c7f0ee4b03..fc16b4c5309b 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -24,29 +24,29 @@ asmlinkage void __raw_write_lock_asm(volatile int *ptr); asmlinkage int __raw_write_trylock_asm(volatile int *ptr); asmlinkage void __raw_write_unlock_asm(volatile int *ptr); -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __raw_spin_is_locked(arch_spinlock_t *lock) { return __raw_spin_is_locked_asm(&lock->lock); } -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { __raw_spin_lock_asm(&lock->lock); } #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { return __raw_spin_trylock_asm(&lock->lock); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { __raw_spin_unlock_asm(&lock->lock); } -static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) +static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) { while (__raw_spin_is_locked(lock)) cpu_relax(); diff --git a/arch/blackfin/include/asm/spinlock_types.h b/arch/blackfin/include/asm/spinlock_types.h index be75762c0610..03b377abf5c0 100644 --- a/arch/blackfin/include/asm/spinlock_types.h +++ b/arch/blackfin/include/asm/spinlock_types.h @@ -15,7 +15,7 @@ typedef struct { volatile unsigned int lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h index 367a53ea10c5..e253457765f2 100644 --- a/arch/cris/include/arch-v32/arch/spinlock.h +++ b/arch/cris/include/arch-v32/arch/spinlock.h @@ -9,12 +9,12 @@ extern void cris_spin_unlock(void *l, int val); extern void cris_spin_lock(void *l); extern int cris_spin_trylock(void *l); -static inline int __raw_spin_is_locked(raw_spinlock_t *x) +static inline int __raw_spin_is_locked(arch_spinlock_t *x) { return *(volatile signed char *)(&(x)->slock) <= 0; } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { __asm__ volatile ("move.d %1,%0" \ : "=m" (lock->slock) \ @@ -22,24 +22,24 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) : "memory"); } -static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) +static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) { while (__raw_spin_is_locked(lock)) cpu_relax(); } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { return cris_spin_trylock((void *)&lock->slock); } -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { cris_spin_lock((void *)&lock->slock); } static inline void -__raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 239ecdc9516d..9fbdf7e61087 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -38,7 +38,7 @@ #define TICKET_BITS 15 #define TICKET_MASK ((1 << TICKET_BITS) - 1) -static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { int *p = (int *)&lock->lock, ticket, serve; @@ -58,7 +58,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) } } -static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->lock); @@ -67,7 +67,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) return 0; } -static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; @@ -75,7 +75,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) ACCESS_ONCE(*p) = (tmp + 2) & ~1; } -static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) { int *p = (int *)&lock->lock, ticket; @@ -89,53 +89,53 @@ static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock) } } -static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) +static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); } -static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) +static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { long tmp = ACCESS_ONCE(lock->lock); return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __raw_spin_is_locked(arch_spinlock_t *lock) { return __ticket_spin_is_locked(lock); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __raw_spin_is_contended(arch_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } #define __raw_spin_is_contended __raw_spin_is_contended -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +static __always_inline void __raw_spin_lock(arch_spinlock_t *lock) { __ticket_spin_lock(lock); } -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock) { return __ticket_spin_trylock(lock); } -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock) { __ticket_spin_unlock(lock); } -static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, +static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } -static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) +static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) { __ticket_spin_unlock_wait(lock); } diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h index 474e46f1ab4a..447ccc6ca7a8 100644 --- a/arch/ia64/include/asm/spinlock_types.h +++ b/arch/ia64/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned int lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index dded923883b2..0c0164225bc0 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -36,7 +36,7 @@ * __raw_spin_trylock() tries to get the lock and returns a result. * On the m32r, the result value is 1 (= Success) or 0 (= Failure). */ -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { int oldval; unsigned long tmp1, tmp2; @@ -69,7 +69,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) return (oldval > 0); } -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { unsigned long tmp0, tmp1; @@ -111,7 +111,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) ); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { mb(); lock->slock = 1; diff --git a/arch/m32r/include/asm/spinlock_types.h b/arch/m32r/include/asm/spinlock_types.h index 83f52105c0e4..17d15bd6322d 100644 --- a/arch/m32r/include/asm/spinlock_types.h +++ b/arch/m32r/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile int slock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 5b60a09a0f08..0f16d0673b4a 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -34,7 +34,7 @@ * becomes equal to the the initial value of the tail. */ -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __raw_spin_is_locked(arch_spinlock_t *lock) { unsigned int counters = ACCESS_ONCE(lock->lock); @@ -45,7 +45,7 @@ static inline int __raw_spin_is_locked(raw_spinlock_t *lock) #define __raw_spin_unlock_wait(x) \ while (__raw_spin_is_locked(x)) { cpu_relax(); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __raw_spin_is_contended(arch_spinlock_t *lock) { unsigned int counters = ACCESS_ONCE(lock->lock); @@ -53,7 +53,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) } #define __raw_spin_is_contended __raw_spin_is_contended -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { int my_ticket; int tmp; @@ -134,7 +134,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) smp_llsc_mb(); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { int tmp; @@ -174,7 +174,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) } } -static inline unsigned int __raw_spin_trylock(raw_spinlock_t *lock) +static inline unsigned int __raw_spin_trylock(arch_spinlock_t *lock) { int tmp, tmp2, tmp3; diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h index adeedaa116c1..2e1060892d3b 100644 --- a/arch/mips/include/asm/spinlock_types.h +++ b/arch/mips/include/asm/spinlock_types.h @@ -12,7 +12,7 @@ typedef struct { * bits 15..28: ticket */ unsigned int lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 8bc9e96699b2..3a4ea778d4b6 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -27,18 +27,18 @@ # define ATOMIC_HASH_SIZE 4 # define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) (a))/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ])) -extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; +extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; /* Can't use raw_spin_lock_irq because of #include problems, so * this is the substitute */ #define _atomic_spin_lock_irqsave(l,f) do { \ - raw_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ __raw_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ - raw_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ __raw_spin_unlock(s); \ local_irq_restore(f); \ } while(0) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index fae03e136fa8..69e8dca26744 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -5,7 +5,7 @@ #include #include -static inline int __raw_spin_is_locked(raw_spinlock_t *x) +static inline int __raw_spin_is_locked(arch_spinlock_t *x) { volatile unsigned int *a = __ldcw_align(x); return *a == 0; @@ -15,7 +15,7 @@ static inline int __raw_spin_is_locked(raw_spinlock_t *x) #define __raw_spin_unlock_wait(x) \ do { cpu_relax(); } while (__raw_spin_is_locked(x)) -static inline void __raw_spin_lock_flags(raw_spinlock_t *x, +static inline void __raw_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) { volatile unsigned int *a; @@ -33,7 +33,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *x, mb(); } -static inline void __raw_spin_unlock(raw_spinlock_t *x) +static inline void __raw_spin_unlock(arch_spinlock_t *x) { volatile unsigned int *a; mb(); @@ -42,7 +42,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *x) mb(); } -static inline int __raw_spin_trylock(raw_spinlock_t *x) +static inline int __raw_spin_trylock(arch_spinlock_t *x) { volatile unsigned int *a; int ret; diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index 3f72f47cf4b2..735caafb81f5 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -9,10 +9,10 @@ typedef struct { volatile unsigned int lock[4]; # define __RAW_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } #endif -} raw_spinlock_t; +} arch_spinlock_t; typedef struct { - raw_spinlock_t lock; + arch_spinlock_t lock; volatile int counter; } raw_rwlock_t; diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c index e3eb739fab19..fdd7f583de54 100644 --- a/arch/parisc/lib/bitops.c +++ b/arch/parisc/lib/bitops.c @@ -12,7 +12,7 @@ #include #ifdef CONFIG_SMP -raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { +arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { [0 ... (ATOMIC_HASH_SIZE-1)] = __RAW_SPIN_LOCK_UNLOCKED }; #endif diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 168fce726201..20de73c36682 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -58,7 +58,7 @@ struct rtas_t { unsigned long entry; /* physical address pointer */ unsigned long base; /* physical address pointer */ unsigned long size; - raw_spinlock_t lock; + arch_spinlock_t lock; struct rtas_args args; struct device_node *dev; /* virtual address pointer */ }; diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 198266cf9e2d..c0d44c92ff0e 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -54,7 +54,7 @@ * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. */ -static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock) +static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp, token; @@ -73,7 +73,7 @@ static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock) return tmp; } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { CLEAR_IO_SYNC; return arch_spin_trylock(lock) == 0; @@ -96,7 +96,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) #if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES) /* We only yield to the hypervisor if we are in shared processor mode */ #define SHARED_PROCESSOR (get_lppaca()->shared_proc) -extern void __spin_yield(raw_spinlock_t *lock); +extern void __spin_yield(arch_spinlock_t *lock); extern void __rw_yield(raw_rwlock_t *lock); #else /* SPLPAR || ISERIES */ #define __spin_yield(x) barrier() @@ -104,7 +104,7 @@ extern void __rw_yield(raw_rwlock_t *lock); #define SHARED_PROCESSOR 0 #endif -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { CLEAR_IO_SYNC; while (1) { @@ -120,7 +120,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) } static inline -void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { unsigned long flags_dis; @@ -140,7 +140,7 @@ void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) } } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { SYNC_IO; __asm__ __volatile__("# __raw_spin_unlock\n\t" @@ -149,7 +149,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) } #ifdef CONFIG_PPC64 -extern void __raw_spin_unlock_wait(raw_spinlock_t *lock); +extern void __raw_spin_unlock_wait(arch_spinlock_t *lock); #else #define __raw_spin_unlock_wait(lock) \ do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 74236c9f05b1..4312e5baaf88 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned int slock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index bf90361bb70f..579069c12152 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -978,7 +978,7 @@ int __init early_init_dt_scan_rtas(unsigned long node, return 1; } -static raw_spinlock_t timebase_lock; +static arch_spinlock_t timebase_lock; static u64 timebase = 0; void __cpuinit rtas_give_timebase(void) diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 79d0fa3a470d..b06294cde499 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -25,7 +25,7 @@ #include #include -void __spin_yield(raw_spinlock_t *lock) +void __spin_yield(arch_spinlock_t *lock) { unsigned int lock_value, holder_cpu, yield_count; @@ -82,7 +82,7 @@ void __rw_yield(raw_rwlock_t *rw) } #endif -void __raw_spin_unlock_wait(raw_spinlock_t *lock) +void __raw_spin_unlock_wait(arch_spinlock_t *lock) { while (lock->slock) { HMT_low(); diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index a4619347aa7e..be36fece41d7 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -71,7 +71,7 @@ static void pas_restart(char *cmd) } #ifdef CONFIG_SMP -static raw_spinlock_t timebase_lock; +static arch_spinlock_t timebase_lock; static unsigned long timebase; static void __devinit pas_give_timebase(void) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index c9af0d19c7ab..6121fa4b83d9 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -57,12 +57,12 @@ _raw_compare_and_swap(volatile unsigned int *lock, do { while (__raw_spin_is_locked(lock)) \ _raw_spin_relax(lock); } while (0) -extern void _raw_spin_lock_wait(raw_spinlock_t *); -extern void _raw_spin_lock_wait_flags(raw_spinlock_t *, unsigned long flags); -extern int _raw_spin_trylock_retry(raw_spinlock_t *); -extern void _raw_spin_relax(raw_spinlock_t *lock); +extern void _raw_spin_lock_wait(arch_spinlock_t *); +extern void _raw_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); +extern int _raw_spin_trylock_retry(arch_spinlock_t *); +extern void _raw_spin_relax(arch_spinlock_t *lock); -static inline void __raw_spin_lock(raw_spinlock_t *lp) +static inline void __raw_spin_lock(arch_spinlock_t *lp) { int old; @@ -72,7 +72,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lp) _raw_spin_lock_wait(lp); } -static inline void __raw_spin_lock_flags(raw_spinlock_t *lp, +static inline void __raw_spin_lock_flags(arch_spinlock_t *lp, unsigned long flags) { int old; @@ -83,7 +83,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lp, _raw_spin_lock_wait_flags(lp, flags); } -static inline int __raw_spin_trylock(raw_spinlock_t *lp) +static inline int __raw_spin_trylock(arch_spinlock_t *lp) { int old; @@ -93,7 +93,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lp) return _raw_spin_trylock_retry(lp); } -static inline void __raw_spin_unlock(raw_spinlock_t *lp) +static inline void __raw_spin_unlock(arch_spinlock_t *lp) { _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0); } diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index 654abc40de04..a93638eee3f7 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned int owner_cpu; -} __attribute__ ((aligned (4))) raw_spinlock_t; +} __attribute__ ((aligned (4))) arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index f7e0d30250b7..d4cbf71a6077 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -39,7 +39,7 @@ static inline void _raw_yield_cpu(int cpu) _raw_yield(); } -void _raw_spin_lock_wait(raw_spinlock_t *lp) +void _raw_spin_lock_wait(arch_spinlock_t *lp) { int count = spin_retry; unsigned int cpu = ~smp_processor_id(); @@ -59,7 +59,7 @@ void _raw_spin_lock_wait(raw_spinlock_t *lp) } EXPORT_SYMBOL(_raw_spin_lock_wait); -void _raw_spin_lock_wait_flags(raw_spinlock_t *lp, unsigned long flags) +void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { int count = spin_retry; unsigned int cpu = ~smp_processor_id(); @@ -82,7 +82,7 @@ void _raw_spin_lock_wait_flags(raw_spinlock_t *lp, unsigned long flags) } EXPORT_SYMBOL(_raw_spin_lock_wait_flags); -int _raw_spin_trylock_retry(raw_spinlock_t *lp) +int _raw_spin_trylock_retry(arch_spinlock_t *lp) { unsigned int cpu = ~smp_processor_id(); int count; @@ -97,7 +97,7 @@ int _raw_spin_trylock_retry(raw_spinlock_t *lp) } EXPORT_SYMBOL(_raw_spin_trylock_retry); -void _raw_spin_relax(raw_spinlock_t *lock) +void _raw_spin_relax(arch_spinlock_t *lock) { unsigned int cpu = lock->owner_cpu; if (cpu != 0) diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index a28c9f0053fd..5a05b3fcefbe 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -34,7 +34,7 @@ * * We make no fairness assumptions. They have a cost. */ -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; unsigned long oldval; @@ -54,7 +54,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) ); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { unsigned long tmp; @@ -67,7 +67,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) ); } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp, oldval; diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h index b4d244e7b60c..37712c32ba99 100644 --- a/arch/sh/include/asm/spinlock_types.h +++ b/arch/sh/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned int lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 857630cff636..b2d8a67f727e 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -15,7 +15,7 @@ #define __raw_spin_unlock_wait(lock) \ do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { __asm__ __volatile__( "\n1:\n\t" @@ -35,7 +35,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) : "g2", "memory", "cc"); } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { unsigned int result; __asm__ __volatile__("ldstub [%1], %0" @@ -45,7 +45,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) return (result == 0); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { __asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory"); } diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 43e514783582..38e16c40efc4 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -27,7 +27,7 @@ do { rmb(); \ } while((lp)->lock) -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -46,7 +46,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) : "memory"); } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { unsigned long result; @@ -59,7 +59,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) return (result == 0UL); } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { __asm__ __volatile__( " stb %%g0, [%0]" @@ -68,7 +68,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) : "memory"); } -static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +static inline void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { unsigned long tmp1, tmp2; diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h index 37cbe01c585b..41d9a8fec13d 100644 --- a/arch/sparc/include/asm/spinlock_types.h +++ b/arch/sparc/include/asm/spinlock_types.h @@ -7,7 +7,7 @@ typedef struct { volatile unsigned char lock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index efb38994859c..5655f75f10b7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) -static inline int __raw_spin_is_locked(struct raw_spinlock *lock) +static inline int __raw_spin_is_locked(struct arch_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); } -static inline int __raw_spin_is_contended(struct raw_spinlock *lock) +static inline int __raw_spin_is_contended(struct arch_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); } #define __raw_spin_is_contended __raw_spin_is_contended -static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) +static __always_inline void __raw_spin_lock(struct arch_spinlock *lock) { PVOP_VCALL1(pv_lock_ops.spin_lock, lock); } -static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock, +static __always_inline void __raw_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) { PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); } -static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) +static __always_inline int __raw_spin_trylock(struct arch_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); } -static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) +static __always_inline void __raw_spin_unlock(struct arch_spinlock *lock) { PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); } diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 9357473c8da0..b1e70d51e40c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -318,14 +318,14 @@ struct pv_mmu_ops { phys_addr_t phys, pgprot_t flags); }; -struct raw_spinlock; +struct arch_spinlock; struct pv_lock_ops { - int (*spin_is_locked)(struct raw_spinlock *lock); - int (*spin_is_contended)(struct raw_spinlock *lock); - void (*spin_lock)(struct raw_spinlock *lock); - void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); - int (*spin_trylock)(struct raw_spinlock *lock); - void (*spin_unlock)(struct raw_spinlock *lock); + int (*spin_is_locked)(struct arch_spinlock *lock); + int (*spin_is_contended)(struct arch_spinlock *lock); + void (*spin_lock)(struct arch_spinlock *lock); + void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); + int (*spin_trylock)(struct arch_spinlock *lock); + void (*spin_unlock)(struct arch_spinlock *lock); }; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 4e77853321db..204b524fcf57 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -58,7 +58,7 @@ #if (NR_CPUS < 256) #define TICKET_SHIFT 8 -static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { short inc = 0x0100; @@ -77,7 +77,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) : "memory", "cc"); } -static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { int tmp, new; @@ -96,7 +96,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) return tmp; } -static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incb %0" : "+m" (lock->slock) @@ -106,7 +106,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) #else #define TICKET_SHIFT 16 -static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { int inc = 0x00010000; int tmp; @@ -127,7 +127,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) : "memory", "cc"); } -static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { int tmp; int new; @@ -149,7 +149,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) return tmp; } -static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incw %0" : "+m" (lock->slock) @@ -158,14 +158,14 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) } #endif -static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) +static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); } -static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) +static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); @@ -174,33 +174,33 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) #ifndef CONFIG_PARAVIRT_SPINLOCKS -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __raw_spin_is_locked(arch_spinlock_t *lock) { return __ticket_spin_is_locked(lock); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __raw_spin_is_contended(arch_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } #define __raw_spin_is_contended __raw_spin_is_contended -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +static __always_inline void __raw_spin_lock(arch_spinlock_t *lock) { __ticket_spin_lock(lock); } -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock) { return __ticket_spin_trylock(lock); } -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock) { __ticket_spin_unlock(lock); } -static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, +static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); @@ -208,7 +208,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, #endif /* CONFIG_PARAVIRT_SPINLOCKS */ -static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) +static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) { while (__raw_spin_is_locked(lock)) cpu_relax(); diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 845f81c87091..2ae7637ed524 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -5,9 +5,9 @@ # error "please don't include this file directly" #endif -typedef struct raw_spinlock { +typedef struct arch_spinlock { unsigned int slock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 0 } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b8ce165dde5d..0862d9d89c92 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -188,7 +188,7 @@ void dump_stack(void) } EXPORT_SYMBOL(dump_stack); -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 3a7c5a44082e..a0f39e090684 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -8,7 +8,7 @@ #include static inline void -default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index eed156851f5d..9f908b9d1abe 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count; * we want to have the fastest, inlined, non-debug version * of a critical section, to be able to prove TSC time-warps: */ -static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; +static __cpuinitdata arch_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; static __cpuinitdata cycles_t last_tsc; static __cpuinitdata cycles_t max_warp; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 36a5141108df..24ded31b5aec 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -120,14 +120,14 @@ struct xen_spinlock { unsigned short spinners; /* count of waiting cpus */ }; -static int xen_spin_is_locked(struct raw_spinlock *lock) +static int xen_spin_is_locked(struct arch_spinlock *lock) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; return xl->lock != 0; } -static int xen_spin_is_contended(struct raw_spinlock *lock) +static int xen_spin_is_contended(struct arch_spinlock *lock) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; @@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct raw_spinlock *lock) return xl->spinners != 0; } -static int xen_spin_trylock(struct raw_spinlock *lock) +static int xen_spin_trylock(struct arch_spinlock *lock) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; u8 old = 1; @@ -181,7 +181,7 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock __get_cpu_var(lock_spinners) = prev; } -static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable) +static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; struct xen_spinlock *prev; @@ -254,7 +254,7 @@ out: return ret; } -static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) +static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; unsigned timeout; @@ -291,12 +291,12 @@ static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable) spin_time_accum_total(start_spin); } -static void xen_spin_lock(struct raw_spinlock *lock) +static void xen_spin_lock(struct arch_spinlock *lock) { __xen_spin_lock(lock, false); } -static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) { __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); } @@ -317,7 +317,7 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) } } -static void xen_spin_unlock(struct raw_spinlock *lock) +static void xen_spin_unlock(struct arch_spinlock *lock) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index c8946465e63a..dcf0afad4a7f 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -15,18 +15,18 @@ # define ATOMIC_HASH_SIZE 4 # define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ])) -extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; +extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; /* Can't use raw_spin_lock_irq because of #include problems, so * this is the substitute */ #define _atomic_spin_lock_irqsave(l,f) do { \ - raw_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ __raw_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ - raw_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ __raw_spin_unlock(s); \ local_irq_restore(f); \ } while(0) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a9aaa709fb93..5ef7a4c060b5 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -8,7 +8,7 @@ * * on SMP builds: * - * asm/spinlock_types.h: contains the raw_spinlock_t/raw_rwlock_t and the + * asm/spinlock_types.h: contains the arch_spinlock_t/raw_rwlock_t and the * initializers * * linux/spinlock_types.h: @@ -75,7 +75,7 @@ #define __lockfunc __attribute__((section(".spinlock.text"))) /* - * Pull the raw_spinlock_t and raw_rwlock_t definitions: + * Pull the arch_spinlock_t and raw_rwlock_t definitions: */ #include diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index f979d5d8a160..d4af2d7a86ea 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -18,7 +18,7 @@ #include typedef struct { - raw_spinlock_t raw_lock; + arch_spinlock_t raw_lock; #ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 04135b0e198e..34d36691c4ec 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -16,13 +16,13 @@ typedef struct { volatile unsigned int slock; -} raw_spinlock_t; +} arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } #else -typedef struct { } raw_spinlock_t; +typedef struct { } arch_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { } diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index d4841ed8215b..8ee2ac1bf636 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -20,19 +20,19 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define __raw_spin_is_locked(x) ((x)->slock == 0) -static inline void __raw_spin_lock(raw_spinlock_t *lock) +static inline void __raw_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; } static inline void -__raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { local_irq_save(flags); lock->slock = 0; } -static inline int __raw_spin_trylock(raw_spinlock_t *lock) +static inline int __raw_spin_trylock(arch_spinlock_t *lock) { char oldval = lock->slock; @@ -41,7 +41,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) return oldval > 0; } -static inline void __raw_spin_unlock(raw_spinlock_t *lock) +static inline void __raw_spin_unlock(arch_spinlock_t *lock) { lock->slock = 1; } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 429540c70d3f..7cc50c62af59 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -73,7 +73,7 @@ module_param(lock_stat, int, 0644); * to use a raw spinlock - we really dont want the spinlock * code to recurse back into the lockdep code... */ -static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static int graph_lock(void) { diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1ca4956ab5e..5ac8ee0a9e35 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -423,7 +423,7 @@ struct ring_buffer_per_cpu { int cpu; struct ring_buffer *buffer; spinlock_t reader_lock; /* serialize readers */ - raw_spinlock_t lock; + arch_spinlock_t lock; struct lock_class_key lock_key; struct list_head *pages; struct buffer_page *head_page; /* read from head */ @@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->buffer = buffer; spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); - cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + cpu_buffer->lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c82dfd92fdfd..7d56cecc2c6e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -493,15 +493,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) * protected by per_cpu spinlocks. But the action of the swap * needs its own lock. * - * This is defined as a raw_spinlock_t in order to help + * This is defined as a arch_spinlock_t in order to help * with performance when lockdep debugging is enabled. * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the * CONFIG_TRACER_MAX_TRACE. */ -static raw_spinlock_t ftrace_max_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t ftrace_max_lock = + (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; #ifdef CONFIG_TRACER_MAX_TRACE unsigned long __read_mostly tracing_max_latency; @@ -802,7 +802,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; -static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; /* temporary disable recording */ static atomic_t trace_record_cmdline_disabled __read_mostly; @@ -1251,8 +1251,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { - static raw_spinlock_t trace_buf_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static arch_spinlock_t trace_buf_lock = + (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static u32 trace_buf[TRACE_BUF_SIZE]; struct ftrace_event_call *call = &event_bprint; @@ -1334,7 +1334,7 @@ int trace_array_printk(struct trace_array *tr, int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; + static arch_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; struct ftrace_event_call *call = &event_print; @@ -4307,8 +4307,8 @@ trace_printk_seq(struct trace_seq *s) static void __ftrace_dump(bool disable_tracing) { - static raw_spinlock_t ftrace_dump_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static arch_spinlock_t ftrace_dump_lock = + (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; unsigned int old_userobj; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 878c03f386ba..206ec3d4b3c2 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -71,10 +71,10 @@ u64 notrace trace_clock(void) /* keep prev_time and lock in the same cacheline. */ static struct { u64 prev_time; - raw_spinlock_t lock; + arch_spinlock_t lock; } trace_clock_struct ____cacheline_aligned_in_smp = { - .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, + .lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, }; u64 notrace trace_clock_global(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 26185d727676..4cf7e83ec235 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -28,8 +28,8 @@ static int wakeup_current_cpu; static unsigned wakeup_prio = -1; static int wakeup_rt; -static raw_spinlock_t wakeup_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t wakeup_lock = + (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static void __wakeup_reset(struct trace_array *tr); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 8504ac71e4e8..9a82d568fdec 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = { }; static unsigned long max_stack_size; -static raw_spinlock_t max_stack_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t max_stack_lock = + (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static int stack_trace_disabled __read_mostly; static DEFINE_PER_CPU(int, trace_active); diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 9c4b0256490b..2acd501b3826 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -23,7 +23,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name, debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map(&lock->dep_map, name, key, 0); #endif - lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + lock->raw_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; lock->magic = SPINLOCK_MAGIC; lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; -- cgit v1.2.3 From edc35bd72e2079b25f99c5da7d7a65dbbffc4a26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Dec 2009 12:38:57 +0100 Subject: locking: Rename __RAW_SPIN_LOCK_UNLOCKED to __ARCH_SPIN_LOCK_UNLOCKED Further name space cleanup. No functional change Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- arch/alpha/include/asm/spinlock_types.h | 2 +- arch/arm/include/asm/spinlock_types.h | 2 +- arch/blackfin/include/asm/spinlock_types.h | 2 +- arch/ia64/include/asm/spinlock_types.h | 2 +- arch/m32r/include/asm/spinlock_types.h | 2 +- arch/mips/include/asm/spinlock_types.h | 2 +- arch/parisc/include/asm/spinlock_types.h | 6 +++--- arch/parisc/lib/bitops.c | 2 +- arch/powerpc/include/asm/spinlock_types.h | 2 +- arch/powerpc/kernel/rtas.c | 2 +- arch/s390/include/asm/spinlock_types.h | 2 +- arch/sh/include/asm/spinlock_types.h | 2 +- arch/sparc/include/asm/spinlock_types.h | 2 +- arch/x86/include/asm/spinlock_types.h | 2 +- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/tsc_sync.c | 2 +- include/linux/spinlock_types.h | 4 ++-- include/linux/spinlock_types_up.h | 4 ++-- kernel/lockdep.c | 2 +- kernel/trace/ring_buffer.c | 2 +- kernel/trace/trace.c | 10 +++++----- kernel/trace/trace_clock.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_stack.c | 2 +- lib/spinlock_debug.c | 2 +- 25 files changed, 33 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h index bb94a51e53d2..08975ee0a100 100644 --- a/arch/alpha/include/asm/spinlock_types.h +++ b/arch/alpha/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned int lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index 5e9d3eadd167..9622e126a8de 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned int lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/blackfin/include/asm/spinlock_types.h b/arch/blackfin/include/asm/spinlock_types.h index 03b377abf5c0..c8a3928a58c5 100644 --- a/arch/blackfin/include/asm/spinlock_types.h +++ b/arch/blackfin/include/asm/spinlock_types.h @@ -17,7 +17,7 @@ typedef struct { volatile unsigned int lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h index 447ccc6ca7a8..6a11b65fa66d 100644 --- a/arch/ia64/include/asm/spinlock_types.h +++ b/arch/ia64/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned int lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int read_counter : 31; diff --git a/arch/m32r/include/asm/spinlock_types.h b/arch/m32r/include/asm/spinlock_types.h index 17d15bd6322d..5873a8701107 100644 --- a/arch/m32r/include/asm/spinlock_types.h +++ b/arch/m32r/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile int slock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 1 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 1 } typedef struct { volatile int lock; diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h index 2e1060892d3b..b4c5efaadb9c 100644 --- a/arch/mips/include/asm/spinlock_types.h +++ b/arch/mips/include/asm/spinlock_types.h @@ -14,7 +14,7 @@ typedef struct { unsigned int lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index 735caafb81f5..396d2746ca57 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -4,10 +4,10 @@ typedef struct { #ifdef CONFIG_PA20 volatile unsigned int slock; -# define __RAW_SPIN_LOCK_UNLOCKED { 1 } +# define __ARCH_SPIN_LOCK_UNLOCKED { 1 } #else volatile unsigned int lock[4]; -# define __RAW_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } +# define __ARCH_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } #endif } arch_spinlock_t; @@ -16,6 +16,6 @@ typedef struct { volatile int counter; } raw_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { __RAW_SPIN_LOCK_UNLOCKED, 0 } +#define __RAW_RW_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED, 0 } #endif diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c index fdd7f583de54..353963d42059 100644 --- a/arch/parisc/lib/bitops.c +++ b/arch/parisc/lib/bitops.c @@ -13,7 +13,7 @@ #ifdef CONFIG_SMP arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { - [0 ... (ATOMIC_HASH_SIZE-1)] = __RAW_SPIN_LOCK_UNLOCKED + [0 ... (ATOMIC_HASH_SIZE-1)] = __ARCH_SPIN_LOCK_UNLOCKED }; #endif diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 4312e5baaf88..f5f39d82711f 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned int slock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile signed int lock; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 579069c12152..57dfa414cfb8 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -42,7 +42,7 @@ #include struct rtas_t rtas = { - .lock = __RAW_SPIN_LOCK_UNLOCKED + .lock = __ARCH_SPIN_LOCK_UNLOCKED }; EXPORT_SYMBOL(rtas); diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index a93638eee3f7..e25c0370f6cd 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned int owner_cpu; } __attribute__ ((aligned (4))) arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h index 37712c32ba99..a3be2db960ed 100644 --- a/arch/sh/include/asm/spinlock_types.h +++ b/arch/sh/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned int lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 1 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 1 } typedef struct { volatile unsigned int lock; diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h index 41d9a8fec13d..c145e63a5d66 100644 --- a/arch/sparc/include/asm/spinlock_types.h +++ b/arch/sparc/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct { volatile unsigned char lock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { volatile unsigned int lock; diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 2ae7637ed524..696f8364a4f3 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -9,7 +9,7 @@ typedef struct arch_spinlock { unsigned int slock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } typedef struct { unsigned int lock; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0862d9d89c92..5b75afac8a38 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -188,7 +188,7 @@ void dump_stack(void) } EXPORT_SYMBOL(dump_stack); -static arch_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9f908b9d1abe..f1714697a09a 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count; * we want to have the fastest, inlined, non-debug version * of a critical section, to be able to prove TSC time-warps: */ -static __cpuinitdata arch_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; +static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; static __cpuinitdata cycles_t last_tsc; static __cpuinitdata cycles_t max_warp; diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index d4af2d7a86ea..7dadce303ebf 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -43,14 +43,14 @@ typedef struct { #ifdef CONFIG_DEBUG_SPINLOCK # define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + (spinlock_t) { .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ .magic = SPINLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ .owner_cpu = -1, \ SPIN_DEP_MAP_INIT(lockname) } #else # define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + (spinlock_t) { .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ SPIN_DEP_MAP_INIT(lockname) } #endif diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 34d36691c4ec..10db021f4875 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -18,13 +18,13 @@ typedef struct { volatile unsigned int slock; } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { 1 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 1 } #else typedef struct { } arch_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED { } +#define __ARCH_SPIN_LOCK_UNLOCKED { } #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 7cc50c62af59..2389e3f85cf6 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -73,7 +73,7 @@ module_param(lock_stat, int, 0644); * to use a raw spinlock - we really dont want the spinlock * code to recurse back into the lockdep code... */ -static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static int graph_lock(void) { diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5ac8ee0a9e35..fb7a0fa508b9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -998,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->buffer = buffer; spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); - cpu_buffer->lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7d56cecc2c6e..63bc1cc38219 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -501,7 +501,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) * CONFIG_TRACER_MAX_TRACE. */ static arch_spinlock_t ftrace_max_lock = - (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; #ifdef CONFIG_TRACER_MAX_TRACE unsigned long __read_mostly tracing_max_latency; @@ -802,7 +802,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; static int cmdline_idx; -static arch_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; +static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; /* temporary disable recording */ static atomic_t trace_record_cmdline_disabled __read_mostly; @@ -1252,7 +1252,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { static arch_spinlock_t trace_buf_lock = - (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static u32 trace_buf[TRACE_BUF_SIZE]; struct ftrace_event_call *call = &event_bprint; @@ -1334,7 +1334,7 @@ int trace_array_printk(struct trace_array *tr, int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - static arch_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; + static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; struct ftrace_event_call *call = &event_print; @@ -4308,7 +4308,7 @@ trace_printk_seq(struct trace_seq *s) static void __ftrace_dump(bool disable_tracing) { static arch_spinlock_t ftrace_dump_lock = - (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; unsigned int old_userobj; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 206ec3d4b3c2..433e2eda2d01 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -74,7 +74,7 @@ static struct { arch_spinlock_t lock; } trace_clock_struct ____cacheline_aligned_in_smp = { - .lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, + .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED, }; u64 notrace trace_clock_global(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 4cf7e83ec235..e347853564e9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -29,7 +29,7 @@ static unsigned wakeup_prio = -1; static int wakeup_rt; static arch_spinlock_t wakeup_lock = - (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static void __wakeup_reset(struct trace_array *tr); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 9a82d568fdec..728c35221483 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -28,7 +28,7 @@ static struct stack_trace max_stack_trace = { static unsigned long max_stack_size; static arch_spinlock_t max_stack_lock = - (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static int stack_trace_disabled __read_mostly; static DEFINE_PER_CPU(int, trace_active); diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 2acd501b3826..f73004137141 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -23,7 +23,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name, debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map(&lock->dep_map, name, key, 0); #endif - lock->raw_lock = (arch_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; lock->magic = SPINLOCK_MAGIC; lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; -- cgit v1.2.3 From 0199c4e68d1f02894bdefe4b5d9e9ee4aedd8d62 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Dec 2009 20:01:25 +0100 Subject: locking: Convert __raw_spin* functions to arch_spin* Name space cleanup. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- arch/alpha/include/asm/spinlock.h | 18 ++++++------ arch/arm/include/asm/spinlock.h | 20 ++++++------- arch/blackfin/include/asm/spinlock.h | 20 ++++++------- arch/cris/include/arch-v32/arch/spinlock.h | 46 +++++++++++++++--------------- arch/ia64/include/asm/bitops.h | 2 +- arch/ia64/include/asm/spinlock.h | 26 ++++++++--------- arch/m32r/include/asm/spinlock.h | 28 +++++++++--------- arch/mips/include/asm/spinlock.h | 36 +++++++++++------------ arch/parisc/include/asm/atomic.h | 4 +-- arch/parisc/include/asm/spinlock.h | 44 ++++++++++++++-------------- arch/powerpc/include/asm/spinlock.h | 32 ++++++++++----------- arch/powerpc/kernel/rtas.c | 12 ++++---- arch/powerpc/lib/locks.c | 4 +-- arch/powerpc/platforms/pasemi/setup.c | 8 +++--- arch/s390/include/asm/spinlock.h | 34 +++++++++++----------- arch/s390/lib/spinlock.c | 22 +++++++------- arch/sh/include/asm/spinlock.h | 26 ++++++++--------- arch/sparc/include/asm/spinlock_32.h | 20 ++++++------- arch/sparc/include/asm/spinlock_64.h | 18 ++++++------ arch/x86/include/asm/paravirt.h | 14 ++++----- arch/x86/include/asm/spinlock.h | 26 ++++++++--------- arch/x86/kernel/dumpstack.c | 6 ++-- arch/x86/kernel/paravirt-spinlocks.c | 2 +- arch/x86/kernel/tsc_sync.c | 8 +++--- include/asm-generic/bitops/atomic.h | 4 +-- include/linux/spinlock.h | 22 +++++++------- include/linux/spinlock_up.h | 26 ++++++++--------- kernel/lockdep.c | 18 ++++++------ kernel/mutex-debug.h | 4 +-- kernel/spinlock.c | 4 +-- kernel/trace/ring_buffer.c | 12 ++++---- kernel/trace/trace.c | 32 ++++++++++----------- kernel/trace/trace_clock.c | 4 +-- kernel/trace/trace_sched_wakeup.c | 12 ++++---- kernel/trace/trace_selftest.c | 4 +-- kernel/trace/trace_stack.c | 12 ++++---- lib/spinlock_debug.c | 8 +++--- 37 files changed, 319 insertions(+), 319 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index bdb26a1940b4..4dac79f504c3 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -12,18 +12,18 @@ * We make no fairness assumptions. They have a cost. */ -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -#define __raw_spin_is_locked(x) ((x)->lock != 0) -#define __raw_spin_unlock_wait(x) \ +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) +#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_unlock_wait(x) \ do { cpu_relax(); } while ((x)->lock) -static inline void __raw_spin_unlock(arch_spinlock_t * lock) +static inline void arch_spin_unlock(arch_spinlock_t * lock) { mb(); lock->lock = 0; } -static inline void __raw_spin_lock(arch_spinlock_t * lock) +static inline void arch_spin_lock(arch_spinlock_t * lock) { long tmp; @@ -43,7 +43,7 @@ static inline void __raw_spin_lock(arch_spinlock_t * lock) : "m"(lock->lock) : "memory"); } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { return !test_and_set_bit(0, &lock->lock); } @@ -169,8 +169,8 @@ static inline void __raw_write_unlock(raw_rwlock_t * lock) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 4e7712ee9394..de62eb098f68 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -17,13 +17,13 @@ * Locked value: 1 */ -#define __raw_spin_is_locked(x) ((x)->lock != 0) -#define __raw_spin_unlock_wait(lock) \ - do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) +#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -43,7 +43,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) smp_mb(); } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; @@ -63,7 +63,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock) } } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { smp_mb(); @@ -220,8 +220,8 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index fc16b4c5309b..62d49540e02b 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -24,31 +24,31 @@ asmlinkage void __raw_write_lock_asm(volatile int *ptr); asmlinkage int __raw_write_trylock_asm(volatile int *ptr); asmlinkage void __raw_write_unlock_asm(volatile int *ptr); -static inline int __raw_spin_is_locked(arch_spinlock_t *lock) +static inline int arch_spin_is_locked(arch_spinlock_t *lock) { return __raw_spin_is_locked_asm(&lock->lock); } -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { __raw_spin_lock_asm(&lock->lock); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { return __raw_spin_trylock_asm(&lock->lock); } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { __raw_spin_unlock_asm(&lock->lock); } -static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (__raw_spin_is_locked(lock)) + while (arch_spin_is_locked(lock)) cpu_relax(); } @@ -92,9 +92,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) __raw_write_unlock_asm(&rw->lock); } -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h index e253457765f2..a2e8a394d555 100644 --- a/arch/cris/include/arch-v32/arch/spinlock.h +++ b/arch/cris/include/arch-v32/arch/spinlock.h @@ -9,12 +9,12 @@ extern void cris_spin_unlock(void *l, int val); extern void cris_spin_lock(void *l); extern int cris_spin_trylock(void *l); -static inline int __raw_spin_is_locked(arch_spinlock_t *x) +static inline int arch_spin_is_locked(arch_spinlock_t *x) { return *(volatile signed char *)(&(x)->slock) <= 0; } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { __asm__ volatile ("move.d %1,%0" \ : "=m" (lock->slock) \ @@ -22,26 +22,26 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) : "memory"); } -static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (__raw_spin_is_locked(lock)) + while (arch_spin_is_locked(lock)) cpu_relax(); } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { return cris_spin_trylock((void *)&lock->slock); } -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { cris_spin_lock((void *)&lock->slock); } static inline void -__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { - __raw_spin_lock(lock); + arch_spin_lock(lock); } /* @@ -68,64 +68,64 @@ static inline int __raw_write_can_lock(raw_rwlock_t *x) static inline void __raw_read_lock(raw_rwlock_t *rw) { - __raw_spin_lock(&rw->slock); + arch_spin_lock(&rw->slock); while (rw->lock == 0); rw->lock--; - __raw_spin_unlock(&rw->slock); + arch_spin_unlock(&rw->slock); } static inline void __raw_write_lock(raw_rwlock_t *rw) { - __raw_spin_lock(&rw->slock); + arch_spin_lock(&rw->slock); while (rw->lock != RW_LOCK_BIAS); rw->lock = 0; - __raw_spin_unlock(&rw->slock); + arch_spin_unlock(&rw->slock); } static inline void __raw_read_unlock(raw_rwlock_t *rw) { - __raw_spin_lock(&rw->slock); + arch_spin_lock(&rw->slock); rw->lock++; - __raw_spin_unlock(&rw->slock); + arch_spin_unlock(&rw->slock); } static inline void __raw_write_unlock(raw_rwlock_t *rw) { - __raw_spin_lock(&rw->slock); + arch_spin_lock(&rw->slock); while (rw->lock != RW_LOCK_BIAS); rw->lock = RW_LOCK_BIAS; - __raw_spin_unlock(&rw->slock); + arch_spin_unlock(&rw->slock); } static inline int __raw_read_trylock(raw_rwlock_t *rw) { int ret = 0; - __raw_spin_lock(&rw->slock); + arch_spin_lock(&rw->slock); if (rw->lock != 0) { rw->lock--; ret = 1; } - __raw_spin_unlock(&rw->slock); + arch_spin_unlock(&rw->slock); return ret; } static inline int __raw_write_trylock(raw_rwlock_t *rw) { int ret = 0; - __raw_spin_lock(&rw->slock); + arch_spin_lock(&rw->slock); if (rw->lock == RW_LOCK_BIAS) { rw->lock = 0; ret = 1; } - __raw_spin_unlock(&rw->slock); + arch_spin_unlock(&rw->slock); return 1; } #define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock) #define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* __ASM_ARCH_SPINLOCK_H */ diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 57a2787bc9fb..6ebc229a1c51 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -127,7 +127,7 @@ clear_bit_unlock (int nr, volatile void *addr) * @addr: Address to start counting from * * Similarly to clear_bit_unlock, the implementation uses a store - * with release semantics. See also __raw_spin_unlock(). + * with release semantics. See also arch_spin_unlock(). */ static __inline__ void __clear_bit_unlock(int nr, void *addr) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 9fbdf7e61087..b06165f6352f 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -17,7 +17,7 @@ #include #include -#define __raw_spin_lock_init(x) ((x)->lock = 0) +#define arch_spin_lock_init(x) ((x)->lock = 0) /* * Ticket locks are conceptually two parts, one indicating the current head of @@ -103,39 +103,39 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; } -static inline int __raw_spin_is_locked(arch_spinlock_t *lock) +static inline int arch_spin_is_locked(arch_spinlock_t *lock) { return __ticket_spin_is_locked(lock); } -static inline int __raw_spin_is_contended(arch_spinlock_t *lock) +static inline int arch_spin_is_contended(arch_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } -#define __raw_spin_is_contended __raw_spin_is_contended +#define arch_spin_is_contended arch_spin_is_contended -static __always_inline void __raw_spin_lock(arch_spinlock_t *lock) +static __always_inline void arch_spin_lock(arch_spinlock_t *lock) { __ticket_spin_lock(lock); } -static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock) +static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) { return __ticket_spin_trylock(lock); } -static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock) +static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) { __ticket_spin_unlock(lock); } -static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock, +static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { - __raw_spin_lock(lock); + arch_spin_lock(lock); } -static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { __ticket_spin_unlock_wait(lock); } @@ -285,8 +285,8 @@ static inline int __raw_read_trylock(raw_rwlock_t *x) return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; } -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 0c0164225bc0..8acac950a43c 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -24,19 +24,19 @@ * We make no fairness assumptions. They have a cost. */ -#define __raw_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -#define __raw_spin_unlock_wait(x) \ - do { cpu_relax(); } while (__raw_spin_is_locked(x)) +#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) +#define arch_spin_unlock_wait(x) \ + do { cpu_relax(); } while (arch_spin_is_locked(x)) /** - * __raw_spin_trylock - Try spin lock and return a result + * arch_spin_trylock - Try spin lock and return a result * @lock: Pointer to the lock variable * - * __raw_spin_trylock() tries to get the lock and returns a result. + * arch_spin_trylock() tries to get the lock and returns a result. * On the m32r, the result value is 1 (= Success) or 0 (= Failure). */ -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { int oldval; unsigned long tmp1, tmp2; @@ -50,7 +50,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock) * } */ __asm__ __volatile__ ( - "# __raw_spin_trylock \n\t" + "# arch_spin_trylock \n\t" "ldi %1, #0; \n\t" "mvfc %2, psw; \n\t" "clrpsw #0x40 -> nop; \n\t" @@ -69,7 +69,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock) return (oldval > 0); } -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp0, tmp1; @@ -84,7 +84,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) * } */ __asm__ __volatile__ ( - "# __raw_spin_lock \n\t" + "# arch_spin_lock \n\t" ".fillinsn \n" "1: \n\t" "mvfc %1, psw; \n\t" @@ -111,7 +111,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) ); } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { mb(); lock->slock = 1; @@ -319,8 +319,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* _ASM_M32R_SPINLOCK_H */ diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 0f16d0673b4a..95edebaaf22a 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -34,33 +34,33 @@ * becomes equal to the the initial value of the tail. */ -static inline int __raw_spin_is_locked(arch_spinlock_t *lock) +static inline int arch_spin_is_locked(arch_spinlock_t *lock) { unsigned int counters = ACCESS_ONCE(lock->lock); return ((counters >> 14) ^ counters) & 0x1fff; } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -#define __raw_spin_unlock_wait(x) \ - while (__raw_spin_is_locked(x)) { cpu_relax(); } +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) +#define arch_spin_unlock_wait(x) \ + while (arch_spin_is_locked(x)) { cpu_relax(); } -static inline int __raw_spin_is_contended(arch_spinlock_t *lock) +static inline int arch_spin_is_contended(arch_spinlock_t *lock) { unsigned int counters = ACCESS_ONCE(lock->lock); return (((counters >> 14) - counters) & 0x1fff) > 1; } -#define __raw_spin_is_contended __raw_spin_is_contended +#define arch_spin_is_contended arch_spin_is_contended -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { int my_ticket; int tmp; if (R10000_LLSC_WAR) { __asm__ __volatile__ ( - " .set push # __raw_spin_lock \n" + " .set push # arch_spin_lock \n" " .set noreorder \n" " \n" "1: ll %[ticket], %[ticket_ptr] \n" @@ -94,7 +94,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) [my_ticket] "=&r" (my_ticket)); } else { __asm__ __volatile__ ( - " .set push # __raw_spin_lock \n" + " .set push # arch_spin_lock \n" " .set noreorder \n" " \n" " ll %[ticket], %[ticket_ptr] \n" @@ -134,7 +134,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) smp_llsc_mb(); } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { int tmp; @@ -142,7 +142,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) if (R10000_LLSC_WAR) { __asm__ __volatile__ ( - " # __raw_spin_unlock \n" + " # arch_spin_unlock \n" "1: ll %[ticket], %[ticket_ptr] \n" " addiu %[ticket], %[ticket], 1 \n" " ori %[ticket], %[ticket], 0x2000 \n" @@ -153,7 +153,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) [ticket] "=&r" (tmp)); } else { __asm__ __volatile__ ( - " .set push # __raw_spin_unlock \n" + " .set push # arch_spin_unlock \n" " .set noreorder \n" " \n" " ll %[ticket], %[ticket_ptr] \n" @@ -174,13 +174,13 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) } } -static inline unsigned int __raw_spin_trylock(arch_spinlock_t *lock) +static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) { int tmp, tmp2, tmp3; if (R10000_LLSC_WAR) { __asm__ __volatile__ ( - " .set push # __raw_spin_trylock \n" + " .set push # arch_spin_trylock \n" " .set noreorder \n" " \n" "1: ll %[ticket], %[ticket_ptr] \n" @@ -204,7 +204,7 @@ static inline unsigned int __raw_spin_trylock(arch_spinlock_t *lock) [now_serving] "=&r" (tmp3)); } else { __asm__ __volatile__ ( - " .set push # __raw_spin_trylock \n" + " .set push # arch_spin_trylock \n" " .set noreorder \n" " \n" " ll %[ticket], %[ticket_ptr] \n" @@ -483,8 +483,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* _ASM_SPINLOCK_H */ diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 3a4ea778d4b6..716634d1f546 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -34,12 +34,12 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; #define _atomic_spin_lock_irqsave(l,f) do { \ arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ - __raw_spin_lock(s); \ + arch_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ arch_spinlock_t *s = ATOMIC_HASH(l); \ - __raw_spin_unlock(s); \ + arch_spin_unlock(s); \ local_irq_restore(f); \ } while(0) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 69e8dca26744..235e7e386e2a 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -5,17 +5,17 @@ #include #include -static inline int __raw_spin_is_locked(arch_spinlock_t *x) +static inline int arch_spin_is_locked(arch_spinlock_t *x) { volatile unsigned int *a = __ldcw_align(x); return *a == 0; } -#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0) -#define __raw_spin_unlock_wait(x) \ - do { cpu_relax(); } while (__raw_spin_is_locked(x)) +#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) +#define arch_spin_unlock_wait(x) \ + do { cpu_relax(); } while (arch_spin_is_locked(x)) -static inline void __raw_spin_lock_flags(arch_spinlock_t *x, +static inline void arch_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) { volatile unsigned int *a; @@ -33,7 +33,7 @@ static inline void __raw_spin_lock_flags(arch_spinlock_t *x, mb(); } -static inline void __raw_spin_unlock(arch_spinlock_t *x) +static inline void arch_spin_unlock(arch_spinlock_t *x) { volatile unsigned int *a; mb(); @@ -42,7 +42,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *x) mb(); } -static inline int __raw_spin_trylock(arch_spinlock_t *x) +static inline int arch_spin_trylock(arch_spinlock_t *x) { volatile unsigned int *a; int ret; @@ -73,9 +73,9 @@ static __inline__ void __raw_read_lock(raw_rwlock_t *rw) { unsigned long flags; local_irq_save(flags); - __raw_spin_lock_flags(&rw->lock, flags); + arch_spin_lock_flags(&rw->lock, flags); rw->counter++; - __raw_spin_unlock(&rw->lock); + arch_spin_unlock(&rw->lock); local_irq_restore(flags); } @@ -85,9 +85,9 @@ static __inline__ void __raw_read_unlock(raw_rwlock_t *rw) { unsigned long flags; local_irq_save(flags); - __raw_spin_lock_flags(&rw->lock, flags); + arch_spin_lock_flags(&rw->lock, flags); rw->counter--; - __raw_spin_unlock(&rw->lock); + arch_spin_unlock(&rw->lock); local_irq_restore(flags); } @@ -98,9 +98,9 @@ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw) unsigned long flags; retry: local_irq_save(flags); - if (__raw_spin_trylock(&rw->lock)) { + if (arch_spin_trylock(&rw->lock)) { rw->counter++; - __raw_spin_unlock(&rw->lock); + arch_spin_unlock(&rw->lock); local_irq_restore(flags); return 1; } @@ -111,7 +111,7 @@ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw) return 0; /* Wait until we have a realistic chance at the lock */ - while (__raw_spin_is_locked(&rw->lock) && rw->counter >= 0) + while (arch_spin_is_locked(&rw->lock) && rw->counter >= 0) cpu_relax(); goto retry; @@ -124,10 +124,10 @@ static __inline__ void __raw_write_lock(raw_rwlock_t *rw) unsigned long flags; retry: local_irq_save(flags); - __raw_spin_lock_flags(&rw->lock, flags); + arch_spin_lock_flags(&rw->lock, flags); if (rw->counter != 0) { - __raw_spin_unlock(&rw->lock); + arch_spin_unlock(&rw->lock); local_irq_restore(flags); while (rw->counter != 0) @@ -144,7 +144,7 @@ retry: static __inline__ void __raw_write_unlock(raw_rwlock_t *rw) { rw->counter = 0; - __raw_spin_unlock(&rw->lock); + arch_spin_unlock(&rw->lock); } /* Note that we have to ensure interrupts are disabled in case we're @@ -155,13 +155,13 @@ static __inline__ int __raw_write_trylock(raw_rwlock_t *rw) int result = 0; local_irq_save(flags); - if (__raw_spin_trylock(&rw->lock)) { + if (arch_spin_trylock(&rw->lock)) { if (rw->counter == 0) { rw->counter = -1; result = 1; } else { /* Read-locked. Oh well. */ - __raw_spin_unlock(&rw->lock); + arch_spin_unlock(&rw->lock); } } local_irq_restore(flags); @@ -190,8 +190,8 @@ static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index c0d44c92ff0e..cdcaf6b97087 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -28,7 +28,7 @@ #include #include -#define __raw_spin_is_locked(x) ((x)->slock != 0) +#define arch_spin_is_locked(x) ((x)->slock != 0) #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ @@ -54,7 +54,7 @@ * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. */ -static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock) +static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp, token; @@ -73,10 +73,10 @@ static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock) return tmp; } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { CLEAR_IO_SYNC; - return arch_spin_trylock(lock) == 0; + return __arch_spin_trylock(lock) == 0; } /* @@ -104,11 +104,11 @@ extern void __rw_yield(raw_rwlock_t *lock); #define SHARED_PROCESSOR 0 #endif -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { CLEAR_IO_SYNC; while (1) { - if (likely(arch_spin_trylock(lock) == 0)) + if (likely(__arch_spin_trylock(lock) == 0)) break; do { HMT_low(); @@ -120,13 +120,13 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) } static inline -void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { unsigned long flags_dis; CLEAR_IO_SYNC; while (1) { - if (likely(arch_spin_trylock(lock) == 0)) + if (likely(__arch_spin_trylock(lock) == 0)) break; local_save_flags(flags_dis); local_irq_restore(flags); @@ -140,19 +140,19 @@ void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) } } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { SYNC_IO; - __asm__ __volatile__("# __raw_spin_unlock\n\t" + __asm__ __volatile__("# arch_spin_unlock\n\t" LWSYNC_ON_SMP: : :"memory"); lock->slock = 0; } #ifdef CONFIG_PPC64 -extern void __raw_spin_unlock_wait(arch_spinlock_t *lock); +extern void arch_spin_unlock_wait(arch_spinlock_t *lock); #else -#define __raw_spin_unlock_wait(lock) \ - do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) #endif /* @@ -290,9 +290,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) __spin_yield(lock) -#define _raw_read_relax(lock) __rw_yield(lock) -#define _raw_write_relax(lock) __rw_yield(lock) +#define arch_spin_relax(lock) __spin_yield(lock) +#define arch_read_relax(lock) __rw_yield(lock) +#define arch_write_relax(lock) __rw_yield(lock) #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 57dfa414cfb8..fd0d29493fd6 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -80,13 +80,13 @@ static unsigned long lock_rtas(void) local_irq_save(flags); preempt_disable(); - __raw_spin_lock_flags(&rtas.lock, flags); + arch_spin_lock_flags(&rtas.lock, flags); return flags; } static void unlock_rtas(unsigned long flags) { - __raw_spin_unlock(&rtas.lock); + arch_spin_unlock(&rtas.lock); local_irq_restore(flags); preempt_enable(); } @@ -987,10 +987,10 @@ void __cpuinit rtas_give_timebase(void) local_irq_save(flags); hard_irq_disable(); - __raw_spin_lock(&timebase_lock); + arch_spin_lock(&timebase_lock); rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); timebase = get_tb(); - __raw_spin_unlock(&timebase_lock); + arch_spin_unlock(&timebase_lock); while (timebase) barrier(); @@ -1002,8 +1002,8 @@ void __cpuinit rtas_take_timebase(void) { while (!timebase) barrier(); - __raw_spin_lock(&timebase_lock); + arch_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - __raw_spin_unlock(&timebase_lock); + arch_spin_unlock(&timebase_lock); } diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index b06294cde499..ee395e392115 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -82,7 +82,7 @@ void __rw_yield(raw_rwlock_t *rw) } #endif -void __raw_spin_unlock_wait(arch_spinlock_t *lock) +void arch_spin_unlock_wait(arch_spinlock_t *lock) { while (lock->slock) { HMT_low(); @@ -92,4 +92,4 @@ void __raw_spin_unlock_wait(arch_spinlock_t *lock) HMT_medium(); } -EXPORT_SYMBOL(__raw_spin_unlock_wait); +EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index be36fece41d7..242f8095c2df 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -80,11 +80,11 @@ static void __devinit pas_give_timebase(void) local_irq_save(flags); hard_irq_disable(); - __raw_spin_lock(&timebase_lock); + arch_spin_lock(&timebase_lock); mtspr(SPRN_TBCTL, TBCTL_FREEZE); isync(); timebase = get_tb(); - __raw_spin_unlock(&timebase_lock); + arch_spin_unlock(&timebase_lock); while (timebase) barrier(); @@ -97,10 +97,10 @@ static void __devinit pas_take_timebase(void) while (!timebase) smp_rmb(); - __raw_spin_lock(&timebase_lock); + arch_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - __raw_spin_unlock(&timebase_lock); + arch_spin_unlock(&timebase_lock); } struct smp_ops_t pas_smp_ops = { diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 6121fa4b83d9..a94c146657a9 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -52,27 +52,27 @@ _raw_compare_and_swap(volatile unsigned int *lock, * (the type definitions are in asm/spinlock_types.h) */ -#define __raw_spin_is_locked(x) ((x)->owner_cpu != 0) -#define __raw_spin_unlock_wait(lock) \ - do { while (__raw_spin_is_locked(lock)) \ - _raw_spin_relax(lock); } while (0) +#define arch_spin_is_locked(x) ((x)->owner_cpu != 0) +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) \ + arch_spin_relax(lock); } while (0) -extern void _raw_spin_lock_wait(arch_spinlock_t *); -extern void _raw_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); -extern int _raw_spin_trylock_retry(arch_spinlock_t *); -extern void _raw_spin_relax(arch_spinlock_t *lock); +extern void arch_spin_lock_wait(arch_spinlock_t *); +extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); +extern int arch_spin_trylock_retry(arch_spinlock_t *); +extern void arch_spin_relax(arch_spinlock_t *lock); -static inline void __raw_spin_lock(arch_spinlock_t *lp) +static inline void arch_spin_lock(arch_spinlock_t *lp) { int old; old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); if (likely(old == 0)) return; - _raw_spin_lock_wait(lp); + arch_spin_lock_wait(lp); } -static inline void __raw_spin_lock_flags(arch_spinlock_t *lp, +static inline void arch_spin_lock_flags(arch_spinlock_t *lp, unsigned long flags) { int old; @@ -80,20 +80,20 @@ static inline void __raw_spin_lock_flags(arch_spinlock_t *lp, old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); if (likely(old == 0)) return; - _raw_spin_lock_wait_flags(lp, flags); + arch_spin_lock_wait_flags(lp, flags); } -static inline int __raw_spin_trylock(arch_spinlock_t *lp) +static inline int arch_spin_trylock(arch_spinlock_t *lp) { int old; old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); if (likely(old == 0)) return 1; - return _raw_spin_trylock_retry(lp); + return arch_spin_trylock_retry(lp); } -static inline void __raw_spin_unlock(arch_spinlock_t *lp) +static inline void arch_spin_unlock(arch_spinlock_t *lp) { _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0); } @@ -188,7 +188,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) return _raw_write_trylock_retry(rw); } -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index d4cbf71a6077..f4596452f072 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -39,7 +39,7 @@ static inline void _raw_yield_cpu(int cpu) _raw_yield(); } -void _raw_spin_lock_wait(arch_spinlock_t *lp) +void arch_spin_lock_wait(arch_spinlock_t *lp) { int count = spin_retry; unsigned int cpu = ~smp_processor_id(); @@ -51,15 +51,15 @@ void _raw_spin_lock_wait(arch_spinlock_t *lp) _raw_yield_cpu(~owner); count = spin_retry; } - if (__raw_spin_is_locked(lp)) + if (arch_spin_is_locked(lp)) continue; if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) return; } } -EXPORT_SYMBOL(_raw_spin_lock_wait); +EXPORT_SYMBOL(arch_spin_lock_wait); -void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) +void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { int count = spin_retry; unsigned int cpu = ~smp_processor_id(); @@ -72,7 +72,7 @@ void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) _raw_yield_cpu(~owner); count = spin_retry; } - if (__raw_spin_is_locked(lp)) + if (arch_spin_is_locked(lp)) continue; local_irq_disable(); if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) @@ -80,30 +80,30 @@ void _raw_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) local_irq_restore(flags); } } -EXPORT_SYMBOL(_raw_spin_lock_wait_flags); +EXPORT_SYMBOL(arch_spin_lock_wait_flags); -int _raw_spin_trylock_retry(arch_spinlock_t *lp) +int arch_spin_trylock_retry(arch_spinlock_t *lp) { unsigned int cpu = ~smp_processor_id(); int count; for (count = spin_retry; count > 0; count--) { - if (__raw_spin_is_locked(lp)) + if (arch_spin_is_locked(lp)) continue; if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) return 1; } return 0; } -EXPORT_SYMBOL(_raw_spin_trylock_retry); +EXPORT_SYMBOL(arch_spin_trylock_retry); -void _raw_spin_relax(arch_spinlock_t *lock) +void arch_spin_relax(arch_spinlock_t *lock) { unsigned int cpu = lock->owner_cpu; if (cpu != 0) _raw_yield_cpu(~cpu); } -EXPORT_SYMBOL(_raw_spin_relax); +EXPORT_SYMBOL(arch_spin_relax); void _raw_read_lock_wait(raw_rwlock_t *rw) { diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index 5a05b3fcefbe..da1c6491ed4b 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -23,10 +23,10 @@ * Your basic SMP spinlocks, allowing only a single CPU anywhere */ -#define __raw_spin_is_locked(x) ((x)->lock <= 0) -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) -#define __raw_spin_unlock_wait(x) \ - do { while (__raw_spin_is_locked(x)) cpu_relax(); } while (0) +#define arch_spin_is_locked(x) ((x)->lock <= 0) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) +#define arch_spin_unlock_wait(x) \ + do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -34,14 +34,14 @@ * * We make no fairness assumptions. They have a cost. */ -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; unsigned long oldval; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%2, %0 ! __raw_spin_lock \n\t" + "movli.l @%2, %0 ! arch_spin_lock \n\t" "mov %0, %1 \n\t" "mov #0, %0 \n\t" "movco.l %0, @%2 \n\t" @@ -54,12 +54,12 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) ); } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { unsigned long tmp; __asm__ __volatile__ ( - "mov #1, %0 ! __raw_spin_unlock \n\t" + "mov #1, %0 ! arch_spin_unlock \n\t" "mov.l %0, @%1 \n\t" : "=&z" (tmp) : "r" (&lock->lock) @@ -67,13 +67,13 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) ); } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp, oldval; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%2, %0 ! __raw_spin_trylock \n\t" + "movli.l @%2, %0 ! arch_spin_trylock \n\t" "mov %0, %1 \n\t" "mov #0, %0 \n\t" "movco.l %0, @%2 \n\t" @@ -219,8 +219,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* __ASM_SH_SPINLOCK_H */ diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index b2d8a67f727e..9b0f2f53c81c 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -10,12 +10,12 @@ #include -#define __raw_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) +#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) -#define __raw_spin_unlock_wait(lock) \ - do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) +#define arch_spin_unlock_wait(lock) \ + do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { __asm__ __volatile__( "\n1:\n\t" @@ -35,7 +35,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) : "g2", "memory", "cc"); } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int result; __asm__ __volatile__("ldstub [%1], %0" @@ -45,7 +45,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock) return (result == 0); } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { __asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory"); } @@ -176,13 +176,13 @@ static inline int arch_read_trylock(raw_rwlock_t *rw) #define __raw_write_unlock(rw) do { (rw)->lock = 0; } while(0) -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define __raw_read_lock_flags(rw, flags) __raw_read_lock(rw) #define __raw_write_lock_flags(rw, flags) __raw_write_lock(rw) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #define __raw_read_can_lock(rw) (!((rw)->lock & 0xff)) #define __raw_write_can_lock(rw) (!(rw)->lock) diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 38e16c40efc4..7cf58a2fcda4 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -21,13 +21,13 @@ * the spinner sections must be pre-V9 branches. */ -#define __raw_spin_is_locked(lp) ((lp)->lock != 0) +#define arch_spin_is_locked(lp) ((lp)->lock != 0) -#define __raw_spin_unlock_wait(lp) \ +#define arch_spin_unlock_wait(lp) \ do { rmb(); \ } while((lp)->lock) -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -46,7 +46,7 @@ static inline void __raw_spin_lock(arch_spinlock_t *lock) : "memory"); } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long result; @@ -59,7 +59,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock) return (result == 0UL); } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { __asm__ __volatile__( " stb %%g0, [%0]" @@ -68,7 +68,7 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) : "memory"); } -static inline void __raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { unsigned long tmp1, tmp2; @@ -222,9 +222,9 @@ static int inline arch_write_trylock(raw_rwlock_t *lock) #define __raw_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) #define __raw_write_can_lock(rw) (!(rw)->lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() #endif /* !(__ASSEMBLY__) */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5655f75f10b7..dd59a85a918f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) -static inline int __raw_spin_is_locked(struct arch_spinlock *lock) +static inline int arch_spin_is_locked(struct arch_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); } -static inline int __raw_spin_is_contended(struct arch_spinlock *lock) +static inline int arch_spin_is_contended(struct arch_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); } -#define __raw_spin_is_contended __raw_spin_is_contended +#define arch_spin_is_contended arch_spin_is_contended -static __always_inline void __raw_spin_lock(struct arch_spinlock *lock) +static __always_inline void arch_spin_lock(struct arch_spinlock *lock) { PVOP_VCALL1(pv_lock_ops.spin_lock, lock); } -static __always_inline void __raw_spin_lock_flags(struct arch_spinlock *lock, +static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) { PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); } -static __always_inline int __raw_spin_trylock(struct arch_spinlock *lock) +static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); } -static __always_inline void __raw_spin_unlock(struct arch_spinlock *lock) +static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) { PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); } diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 204b524fcf57..ab9055fd57d9 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -174,43 +174,43 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) #ifndef CONFIG_PARAVIRT_SPINLOCKS -static inline int __raw_spin_is_locked(arch_spinlock_t *lock) +static inline int arch_spin_is_locked(arch_spinlock_t *lock) { return __ticket_spin_is_locked(lock); } -static inline int __raw_spin_is_contended(arch_spinlock_t *lock) +static inline int arch_spin_is_contended(arch_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } -#define __raw_spin_is_contended __raw_spin_is_contended +#define arch_spin_is_contended arch_spin_is_contended -static __always_inline void __raw_spin_lock(arch_spinlock_t *lock) +static __always_inline void arch_spin_lock(arch_spinlock_t *lock) { __ticket_spin_lock(lock); } -static __always_inline int __raw_spin_trylock(arch_spinlock_t *lock) +static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) { return __ticket_spin_trylock(lock); } -static __always_inline void __raw_spin_unlock(arch_spinlock_t *lock) +static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) { __ticket_spin_unlock(lock); } -static __always_inline void __raw_spin_lock_flags(arch_spinlock_t *lock, +static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { - __raw_spin_lock(lock); + arch_spin_lock(lock); } #endif /* CONFIG_PARAVIRT_SPINLOCKS */ -static inline void __raw_spin_unlock_wait(arch_spinlock_t *lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (__raw_spin_is_locked(lock)) + while (arch_spin_is_locked(lock)) cpu_relax(); } @@ -298,9 +298,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define arch_spin_relax(lock) cpu_relax() +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() /* The {read|write|spin}_lock() on x86 are full memory barriers. */ static inline void smp_mb__after_lock(void) { } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 5b75afac8a38..0a0aa1cec8f1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -207,11 +207,11 @@ unsigned __kprobes long oops_begin(void) /* racy, but better than risking deadlock. */ raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!__raw_spin_trylock(&die_lock)) { + if (!arch_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - __raw_spin_lock(&die_lock); + arch_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -231,7 +231,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) die_nest_count--; if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - __raw_spin_unlock(&die_lock); + arch_spin_unlock(&die_lock); raw_local_irq_restore(flags); oops_exit(); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index a0f39e090684..676b8c77a976 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -10,7 +10,7 @@ static inline void default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { - __raw_spin_lock(lock); + arch_spin_lock(lock); } struct pv_lock_ops pv_lock_ops = { diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f1714697a09a..0aa5fed8b9e6 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void) * previous TSC that was measured (possibly on * another CPU) and update the previous TSC timestamp. */ - __raw_spin_lock(&sync_lock); + arch_spin_lock(&sync_lock); prev = last_tsc; rdtsc_barrier(); now = get_cycles(); rdtsc_barrier(); last_tsc = now; - __raw_spin_unlock(&sync_lock); + arch_spin_unlock(&sync_lock); /* * Be nice every now and then (and also check whether @@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void) * we saw a time-warp of the TSC going backwards: */ if (unlikely(prev > now)) { - __raw_spin_lock(&sync_lock); + arch_spin_lock(&sync_lock); max_warp = max(max_warp, prev - now); nr_warps++; - __raw_spin_unlock(&sync_lock); + arch_spin_unlock(&sync_lock); } } WARN(!(now-start), diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dcf0afad4a7f..ecc44a8e2b44 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -22,12 +22,12 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; #define _atomic_spin_lock_irqsave(l,f) do { \ arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ - __raw_spin_lock(s); \ + arch_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ arch_spinlock_t *s = ATOMIC_HASH(l); \ - __raw_spin_unlock(s); \ + arch_spin_unlock(s); \ local_irq_restore(f); \ } while(0) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 5ef7a4c060b5..de3a022489c6 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -14,7 +14,7 @@ * linux/spinlock_types.h: * defines the generic type and initializers * - * asm/spinlock.h: contains the __raw_spin_*()/etc. lowlevel + * asm/spinlock.h: contains the arch_spin_*()/etc. lowlevel * implementations, mostly inline assembly code * * (also included on UP-debug builds:) @@ -34,7 +34,7 @@ * defines the generic type and initializers * * linux/spinlock_up.h: - * contains the __raw_spin_*()/etc. version of UP + * contains the arch_spin_*()/etc. version of UP * builds. (which are NOPs on non-debug, non-preempt * builds) * @@ -103,17 +103,17 @@ do { \ do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0) #endif -#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) +#define spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) #ifdef CONFIG_GENERIC_LOCKBREAK #define spin_is_contended(lock) ((lock)->break_lock) #else -#ifdef __raw_spin_is_contended -#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#ifdef arch_spin_is_contended +#define spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) #else #define spin_is_contended(lock) (((void)(lock), 0)) -#endif /*__raw_spin_is_contended*/ +#endif /*arch_spin_is_contended*/ #endif /* The lock does not imply full memory barrier. */ @@ -125,7 +125,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. */ -#define spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock) +#define spin_unlock_wait(lock) arch_spin_unlock_wait(&(lock)->raw_lock) #ifdef CONFIG_DEBUG_SPINLOCK extern void _raw_spin_lock(spinlock_t *lock); @@ -133,11 +133,11 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } extern int _raw_spin_trylock(spinlock_t *lock); extern void _raw_spin_unlock(spinlock_t *lock); #else -# define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock) +# define _raw_spin_lock(lock) arch_spin_lock(&(lock)->raw_lock) # define _raw_spin_lock_flags(lock, flags) \ - __raw_spin_lock_flags(&(lock)->raw_lock, *(flags)) -# define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) -# define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) + arch_spin_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_spin_trylock(lock) arch_spin_trylock(&(lock)->raw_lock) +# define _raw_spin_unlock(lock) arch_spin_unlock(&(lock)->raw_lock) #endif /* diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 8ee2ac1bf636..1d3bcc3cf7c6 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -18,21 +18,21 @@ */ #ifdef CONFIG_DEBUG_SPINLOCK -#define __raw_spin_is_locked(x) ((x)->slock == 0) +#define arch_spin_is_locked(x) ((x)->slock == 0) -static inline void __raw_spin_lock(arch_spinlock_t *lock) +static inline void arch_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; } static inline void -__raw_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) { local_irq_save(flags); lock->slock = 0; } -static inline int __raw_spin_trylock(arch_spinlock_t *lock) +static inline int arch_spin_trylock(arch_spinlock_t *lock) { char oldval = lock->slock; @@ -41,7 +41,7 @@ static inline int __raw_spin_trylock(arch_spinlock_t *lock) return oldval > 0; } -static inline void __raw_spin_unlock(arch_spinlock_t *lock) +static inline void arch_spin_unlock(arch_spinlock_t *lock) { lock->slock = 1; } @@ -57,20 +57,20 @@ static inline void __raw_spin_unlock(arch_spinlock_t *lock) #define __raw_write_unlock(lock) do { (void)(lock); } while (0) #else /* DEBUG_SPINLOCK */ -#define __raw_spin_is_locked(lock) ((void)(lock), 0) +#define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ -# define __raw_spin_lock(lock) do { (void)(lock); } while (0) -# define __raw_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) -# define __raw_spin_unlock(lock) do { (void)(lock); } while (0) -# define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) +# define arch_spin_lock(lock) do { (void)(lock); } while (0) +# define arch_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) +# define arch_spin_unlock(lock) do { (void)(lock); } while (0) +# define arch_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ -#define __raw_spin_is_contended(lock) (((void)(lock), 0)) +#define arch_spin_is_contended(lock) (((void)(lock), 0)) #define __raw_read_can_lock(lock) (((void)(lock), 1)) #define __raw_write_can_lock(lock) (((void)(lock), 1)) -#define __raw_spin_unlock_wait(lock) \ - do { cpu_relax(); } while (__raw_spin_is_locked(lock)) +#define arch_spin_unlock_wait(lock) \ + do { cpu_relax(); } while (arch_spin_is_locked(lock)) #endif /* __LINUX_SPINLOCK_UP_H */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 2389e3f85cf6..5feaddcdbe49 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -77,7 +77,7 @@ static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED static int graph_lock(void) { - __raw_spin_lock(&lockdep_lock); + arch_spin_lock(&lockdep_lock); /* * Make sure that if another CPU detected a bug while * walking the graph we dont change it (while the other @@ -85,7 +85,7 @@ static int graph_lock(void) * dropped already) */ if (!debug_locks) { - __raw_spin_unlock(&lockdep_lock); + arch_spin_unlock(&lockdep_lock); return 0; } /* prevent any recursions within lockdep from causing deadlocks */ @@ -95,11 +95,11 @@ static int graph_lock(void) static inline int graph_unlock(void) { - if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) + if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) return DEBUG_LOCKS_WARN_ON(1); current->lockdep_recursion--; - __raw_spin_unlock(&lockdep_lock); + arch_spin_unlock(&lockdep_lock); return 0; } @@ -111,7 +111,7 @@ static inline int debug_locks_off_graph_unlock(void) { int ret = debug_locks_off(); - __raw_spin_unlock(&lockdep_lock); + arch_spin_unlock(&lockdep_lock); return ret; } @@ -1170,9 +1170,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) this.class = class; local_irq_save(flags); - __raw_spin_lock(&lockdep_lock); + arch_spin_lock(&lockdep_lock); ret = __lockdep_count_forward_deps(&this); - __raw_spin_unlock(&lockdep_lock); + arch_spin_unlock(&lockdep_lock); local_irq_restore(flags); return ret; @@ -1197,9 +1197,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) this.class = class; local_irq_save(flags); - __raw_spin_lock(&lockdep_lock); + arch_spin_lock(&lockdep_lock); ret = __lockdep_count_backward_deps(&this); - __raw_spin_unlock(&lockdep_lock); + arch_spin_unlock(&lockdep_lock); local_irq_restore(flags); return ret; diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index 6b2d735846a5..7bebbd15b342 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h @@ -43,13 +43,13 @@ static inline void mutex_clear_owner(struct mutex *lock) \ DEBUG_LOCKS_WARN_ON(in_interrupt()); \ local_irq_save(flags); \ - __raw_spin_lock(&(lock)->raw_lock); \ + arch_spin_lock(&(lock)->raw_lock); \ DEBUG_LOCKS_WARN_ON(l->magic != l); \ } while (0) #define spin_unlock_mutex(lock, flags) \ do { \ - __raw_spin_unlock(&(lock)->raw_lock); \ + arch_spin_unlock(&(lock)->raw_lock); \ local_irq_restore(flags); \ preempt_check_resched(); \ } while (0) diff --git a/kernel/spinlock.c b/kernel/spinlock.c index e6e136318437..fbb5f8b78357 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -53,7 +53,7 @@ void __lockfunc __##op##_lock(locktype##_t *lock) \ if (!(lock)->break_lock) \ (lock)->break_lock = 1; \ while (!op##_can_lock(lock) && (lock)->break_lock) \ - _raw_##op##_relax(&lock->raw_lock); \ + arch_##op##_relax(&lock->raw_lock); \ } \ (lock)->break_lock = 0; \ } \ @@ -73,7 +73,7 @@ unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \ if (!(lock)->break_lock) \ (lock)->break_lock = 1; \ while (!op##_can_lock(lock) && (lock)->break_lock) \ - _raw_##op##_relax(&lock->raw_lock); \ + arch_##op##_relax(&lock->raw_lock); \ } \ (lock)->break_lock = 0; \ return flags; \ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index fb7a0fa508b9..f58c9ad15830 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2834,7 +2834,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) int ret; local_irq_save(flags); - __raw_spin_lock(&cpu_buffer->lock); + arch_spin_lock(&cpu_buffer->lock); again: /* @@ -2923,7 +2923,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) goto again; out: - __raw_spin_unlock(&cpu_buffer->lock); + arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return reader; @@ -3286,9 +3286,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) synchronize_sched(); spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - __raw_spin_lock(&cpu_buffer->lock); + arch_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); - __raw_spin_unlock(&cpu_buffer->lock); + arch_spin_unlock(&cpu_buffer->lock); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return iter; @@ -3408,11 +3408,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) goto out; - __raw_spin_lock(&cpu_buffer->lock); + arch_spin_lock(&cpu_buffer->lock); rb_reset_cpu(cpu_buffer); - __raw_spin_unlock(&cpu_buffer->lock); + arch_spin_unlock(&cpu_buffer->lock); out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 63bc1cc38219..bb6b5e7fa2a2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -555,13 +555,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - __raw_spin_lock(&ftrace_max_lock); + arch_spin_lock(&ftrace_max_lock); tr->buffer = max_tr.buffer; max_tr.buffer = buf; __update_max_tr(tr, tsk, cpu); - __raw_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&ftrace_max_lock); } /** @@ -581,7 +581,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - __raw_spin_lock(&ftrace_max_lock); + arch_spin_lock(&ftrace_max_lock); ftrace_disable_cpu(); @@ -603,7 +603,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); __update_max_tr(tr, tsk, cpu); - __raw_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&ftrace_max_lock); } #endif /* CONFIG_TRACER_MAX_TRACE */ @@ -915,7 +915,7 @@ static void trace_save_cmdline(struct task_struct *tsk) * nor do we want to disable interrupts, * so if we miss here, then better luck next time. */ - if (!__raw_spin_trylock(&trace_cmdline_lock)) + if (!arch_spin_trylock(&trace_cmdline_lock)) return; idx = map_pid_to_cmdline[tsk->pid]; @@ -940,7 +940,7 @@ static void trace_save_cmdline(struct task_struct *tsk) memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); - __raw_spin_unlock(&trace_cmdline_lock); + arch_spin_unlock(&trace_cmdline_lock); } void trace_find_cmdline(int pid, char comm[]) @@ -958,14 +958,14 @@ void trace_find_cmdline(int pid, char comm[]) } preempt_disable(); - __raw_spin_lock(&trace_cmdline_lock); + arch_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) strcpy(comm, saved_cmdlines[map]); else strcpy(comm, "<...>"); - __raw_spin_unlock(&trace_cmdline_lock); + arch_spin_unlock(&trace_cmdline_lock); preempt_enable(); } @@ -1283,7 +1283,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) /* Lockdep uses trace_printk for lock tracing */ local_irq_save(flags); - __raw_spin_lock(&trace_buf_lock); + arch_spin_lock(&trace_buf_lock); len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); if (len > TRACE_BUF_SIZE || len < 0) @@ -1304,7 +1304,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) ring_buffer_unlock_commit(buffer, event); out_unlock: - __raw_spin_unlock(&trace_buf_lock); + arch_spin_unlock(&trace_buf_lock); local_irq_restore(flags); out: @@ -1360,7 +1360,7 @@ int trace_array_vprintk(struct trace_array *tr, pause_graph_tracing(); raw_local_irq_save(irq_flags); - __raw_spin_lock(&trace_buf_lock); + arch_spin_lock(&trace_buf_lock); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; @@ -1378,7 +1378,7 @@ int trace_array_vprintk(struct trace_array *tr, ring_buffer_unlock_commit(buffer, event); out_unlock: - __raw_spin_unlock(&trace_buf_lock); + arch_spin_unlock(&trace_buf_lock); raw_local_irq_restore(irq_flags); unpause_graph_tracing(); out: @@ -2279,7 +2279,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); local_irq_disable(); - __raw_spin_lock(&ftrace_max_lock); + arch_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { /* * Increase/decrease the disabled counter if we are @@ -2294,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, atomic_dec(&global_trace.data[cpu]->disabled); } } - __raw_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&ftrace_max_lock); local_irq_enable(); cpumask_copy(tracing_cpumask, tracing_cpumask_new); @@ -4318,7 +4318,7 @@ static void __ftrace_dump(bool disable_tracing) /* only one dump */ local_irq_save(flags); - __raw_spin_lock(&ftrace_dump_lock); + arch_spin_lock(&ftrace_dump_lock); if (dump_ran) goto out; @@ -4393,7 +4393,7 @@ static void __ftrace_dump(bool disable_tracing) } out: - __raw_spin_unlock(&ftrace_dump_lock); + arch_spin_unlock(&ftrace_dump_lock); local_irq_restore(flags); } diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 433e2eda2d01..84a3a7ba072a 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -94,7 +94,7 @@ u64 notrace trace_clock_global(void) if (unlikely(in_nmi())) goto out; - __raw_spin_lock(&trace_clock_struct.lock); + arch_spin_lock(&trace_clock_struct.lock); /* * TODO: if this happens often then maybe we should reset @@ -106,7 +106,7 @@ u64 notrace trace_clock_global(void) trace_clock_struct.prev_time = now; - __raw_spin_unlock(&trace_clock_struct.lock); + arch_spin_unlock(&trace_clock_struct.lock); out: raw_local_irq_restore(flags); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index e347853564e9..0271742abb8d 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, goto out; local_irq_save(flags); - __raw_spin_lock(&wakeup_lock); + arch_spin_lock(&wakeup_lock); /* We could race with grabbing wakeup_lock */ if (unlikely(!tracer_enabled || next != wakeup_task)) @@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, out_unlock: __wakeup_reset(wakeup_trace); - __raw_spin_unlock(&wakeup_lock); + arch_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: atomic_dec(&wakeup_trace->data[cpu]->disabled); @@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr) tracing_reset_online_cpus(tr); local_irq_save(flags); - __raw_spin_lock(&wakeup_lock); + arch_spin_lock(&wakeup_lock); __wakeup_reset(tr); - __raw_spin_unlock(&wakeup_lock); + arch_spin_unlock(&wakeup_lock); local_irq_restore(flags); } @@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) goto out; /* interrupts should be off from try_to_wake_up */ - __raw_spin_lock(&wakeup_lock); + arch_spin_lock(&wakeup_lock); /* check for races. */ if (!tracer_enabled || p->prio >= wakeup_prio) @@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: - __raw_spin_unlock(&wakeup_lock); + arch_spin_unlock(&wakeup_lock); out: atomic_dec(&wakeup_trace->data[cpu]->disabled); } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index dc98309e839a..280fea470d67 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) /* Don't allow flipping of max traces now */ local_irq_save(flags); - __raw_spin_lock(&ftrace_max_lock); + arch_spin_lock(&ftrace_max_lock); cnt = ring_buffer_entries(tr->buffer); @@ -85,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) break; } tracing_on(); - __raw_spin_unlock(&ftrace_max_lock); + arch_spin_unlock(&ftrace_max_lock); local_irq_restore(flags); if (count) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 728c35221483..678a5120ee30 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -54,7 +54,7 @@ static inline void check_stack(void) return; local_irq_save(flags); - __raw_spin_lock(&max_stack_lock); + arch_spin_lock(&max_stack_lock); /* a race could have already updated it */ if (this_size <= max_stack_size) @@ -103,7 +103,7 @@ static inline void check_stack(void) } out: - __raw_spin_unlock(&max_stack_lock); + arch_spin_unlock(&max_stack_lock); local_irq_restore(flags); } @@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return ret; local_irq_save(flags); - __raw_spin_lock(&max_stack_lock); + arch_spin_lock(&max_stack_lock); *ptr = val; - __raw_spin_unlock(&max_stack_lock); + arch_spin_unlock(&max_stack_lock); local_irq_restore(flags); return count; @@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { local_irq_disable(); - __raw_spin_lock(&max_stack_lock); + arch_spin_lock(&max_stack_lock); if (*pos == 0) return SEQ_START_TOKEN; @@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { - __raw_spin_unlock(&max_stack_lock); + arch_spin_unlock(&max_stack_lock); local_irq_enable(); } diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index f73004137141..1304fe094546 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -106,7 +106,7 @@ static void __spin_lock_debug(spinlock_t *lock) for (;;) { for (i = 0; i < loops; i++) { - if (__raw_spin_trylock(&lock->raw_lock)) + if (arch_spin_trylock(&lock->raw_lock)) return; __delay(1); } @@ -128,14 +128,14 @@ static void __spin_lock_debug(spinlock_t *lock) void _raw_spin_lock(spinlock_t *lock) { debug_spin_lock_before(lock); - if (unlikely(!__raw_spin_trylock(&lock->raw_lock))) + if (unlikely(!arch_spin_trylock(&lock->raw_lock))) __spin_lock_debug(lock); debug_spin_lock_after(lock); } int _raw_spin_trylock(spinlock_t *lock) { - int ret = __raw_spin_trylock(&lock->raw_lock); + int ret = arch_spin_trylock(&lock->raw_lock); if (ret) debug_spin_lock_after(lock); @@ -151,7 +151,7 @@ int _raw_spin_trylock(spinlock_t *lock) void _raw_spin_unlock(spinlock_t *lock) { debug_spin_unlock(lock); - __raw_spin_unlock(&lock->raw_lock); + arch_spin_unlock(&lock->raw_lock); } static void rwlock_bug(rwlock_t *lock, const char *msg) -- cgit v1.2.3 From fb3a6bbc912b12347614e5742c7c61416cdb0ca0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Dec 2009 20:01:19 +0100 Subject: locking: Convert raw_rwlock to arch_rwlock Not strictly necessary for -rt as -rt does not have non sleeping rwlocks, but it's odd to not have a consistent naming convention. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- arch/alpha/include/asm/spinlock.h | 16 ++++++++-------- arch/alpha/include/asm/spinlock_types.h | 4 ++-- arch/arm/include/asm/spinlock.h | 12 ++++++------ arch/arm/include/asm/spinlock_types.h | 4 ++-- arch/blackfin/include/asm/spinlock.h | 16 ++++++++-------- arch/blackfin/include/asm/spinlock_types.h | 4 ++-- arch/cris/include/arch-v32/arch/spinlock.h | 16 ++++++++-------- arch/ia64/include/asm/spinlock.h | 16 ++++++++-------- arch/ia64/include/asm/spinlock_types.h | 4 ++-- arch/m32r/include/asm/spinlock.h | 12 ++++++------ arch/m32r/include/asm/spinlock_types.h | 4 ++-- arch/mips/include/asm/spinlock.h | 12 ++++++------ arch/mips/include/asm/spinlock_types.h | 4 ++-- arch/parisc/include/asm/spinlock.h | 16 ++++++++-------- arch/parisc/include/asm/spinlock_types.h | 4 ++-- arch/powerpc/include/asm/spinlock.h | 18 +++++++++--------- arch/powerpc/include/asm/spinlock_types.h | 4 ++-- arch/powerpc/lib/locks.c | 2 +- arch/s390/include/asm/spinlock.h | 30 +++++++++++++++--------------- arch/s390/include/asm/spinlock_types.h | 4 ++-- arch/s390/lib/spinlock.c | 12 ++++++------ arch/sh/include/asm/spinlock.h | 12 ++++++------ arch/sh/include/asm/spinlock_types.h | 4 ++-- arch/sparc/include/asm/spinlock_32.h | 20 ++++++++++---------- arch/sparc/include/asm/spinlock_64.h | 12 ++++++------ arch/sparc/include/asm/spinlock_types.h | 4 ++-- arch/x86/include/asm/spinlock.h | 16 ++++++++-------- arch/x86/include/asm/spinlock_types.h | 4 ++-- include/linux/rwlock_types.h | 6 +++--- include/linux/spinlock.h | 4 ++-- include/linux/spinlock_types_up.h | 4 ++-- lib/spinlock_debug.c | 2 +- 32 files changed, 151 insertions(+), 151 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 4dac79f504c3..e8b2970f037b 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -50,17 +50,17 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) /***********************************************************/ -static inline int __raw_read_can_lock(raw_rwlock_t *lock) +static inline int __raw_read_can_lock(arch_rwlock_t *lock) { return (lock->lock & 1) == 0; } -static inline int __raw_write_can_lock(raw_rwlock_t *lock) +static inline int __raw_write_can_lock(arch_rwlock_t *lock) { return lock->lock == 0; } -static inline void __raw_read_lock(raw_rwlock_t *lock) +static inline void __raw_read_lock(arch_rwlock_t *lock) { long regx; @@ -80,7 +80,7 @@ static inline void __raw_read_lock(raw_rwlock_t *lock) : "m" (*lock) : "memory"); } -static inline void __raw_write_lock(raw_rwlock_t *lock) +static inline void __raw_write_lock(arch_rwlock_t *lock) { long regx; @@ -100,7 +100,7 @@ static inline void __raw_write_lock(raw_rwlock_t *lock) : "m" (*lock) : "memory"); } -static inline int __raw_read_trylock(raw_rwlock_t * lock) +static inline int __raw_read_trylock(arch_rwlock_t * lock) { long regx; int success; @@ -122,7 +122,7 @@ static inline int __raw_read_trylock(raw_rwlock_t * lock) return success; } -static inline int __raw_write_trylock(raw_rwlock_t * lock) +static inline int __raw_write_trylock(arch_rwlock_t * lock) { long regx; int success; @@ -144,7 +144,7 @@ static inline int __raw_write_trylock(raw_rwlock_t * lock) return success; } -static inline void __raw_read_unlock(raw_rwlock_t * lock) +static inline void __raw_read_unlock(arch_rwlock_t * lock) { long regx; __asm__ __volatile__( @@ -160,7 +160,7 @@ static inline void __raw_read_unlock(raw_rwlock_t * lock) : "m" (*lock) : "memory"); } -static inline void __raw_write_unlock(raw_rwlock_t * lock) +static inline void __raw_write_unlock(arch_rwlock_t * lock) { mb(); lock->lock = 0; diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h index 08975ee0a100..54c2afce0a1d 100644 --- a/arch/alpha/include/asm/spinlock_types.h +++ b/arch/alpha/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0 } #endif diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index de62eb098f68..a8671d8bc7d4 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -86,7 +86,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * just write zero since the lock is exclusively held. */ -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { unsigned long tmp; @@ -106,7 +106,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) smp_mb(); } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { unsigned long tmp; @@ -126,7 +126,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) } } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { smp_mb(); @@ -156,7 +156,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) * currently active. However, we know we won't have any write * locks. */ -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { unsigned long tmp, tmp2; @@ -176,7 +176,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) smp_mb(); } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { unsigned long tmp, tmp2; @@ -198,7 +198,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) : "cc"); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { unsigned long tmp, tmp2 = 1; diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index 9622e126a8de..d14d197ae04a 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0 } #endif diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 62d49540e02b..7e1c56b0a571 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -52,42 +52,42 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } -static inline int __raw_read_can_lock(raw_rwlock_t *rw) +static inline int __raw_read_can_lock(arch_rwlock_t *rw) { return __raw_uncached_fetch_asm(&rw->lock) > 0; } -static inline int __raw_write_can_lock(raw_rwlock_t *rw) +static inline int __raw_write_can_lock(arch_rwlock_t *rw) { return __raw_uncached_fetch_asm(&rw->lock) == RW_LOCK_BIAS; } -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { __raw_read_lock_asm(&rw->lock); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { return __raw_read_trylock_asm(&rw->lock); } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { __raw_read_unlock_asm(&rw->lock); } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { __raw_write_lock_asm(&rw->lock); } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { return __raw_write_trylock_asm(&rw->lock); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { __raw_write_unlock_asm(&rw->lock); } diff --git a/arch/blackfin/include/asm/spinlock_types.h b/arch/blackfin/include/asm/spinlock_types.h index c8a3928a58c5..1a33608c958b 100644 --- a/arch/blackfin/include/asm/spinlock_types.h +++ b/arch/blackfin/include/asm/spinlock_types.h @@ -21,8 +21,8 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } +#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } #endif diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h index a2e8a394d555..1d7d3a8046cb 100644 --- a/arch/cris/include/arch-v32/arch/spinlock.h +++ b/arch/cris/include/arch-v32/arch/spinlock.h @@ -56,17 +56,17 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) * */ -static inline int __raw_read_can_lock(raw_rwlock_t *x) +static inline int __raw_read_can_lock(arch_rwlock_t *x) { return (int)(x)->lock > 0; } -static inline int __raw_write_can_lock(raw_rwlock_t *x) +static inline int __raw_write_can_lock(arch_rwlock_t *x) { return (x)->lock == RW_LOCK_BIAS; } -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); while (rw->lock == 0); @@ -74,7 +74,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) arch_spin_unlock(&rw->slock); } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); while (rw->lock != RW_LOCK_BIAS); @@ -82,14 +82,14 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) arch_spin_unlock(&rw->slock); } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); rw->lock++; arch_spin_unlock(&rw->slock); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); while (rw->lock != RW_LOCK_BIAS); @@ -97,7 +97,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) arch_spin_unlock(&rw->slock); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { int ret = 0; arch_spin_lock(&rw->slock); @@ -109,7 +109,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw) return ret; } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { int ret = 0; arch_spin_lock(&rw->slock); diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index b06165f6352f..6715b6a8ebc3 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -146,7 +146,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) #ifdef ASM_SUPPORTED static __always_inline void -__raw_read_lock_flags(raw_rwlock_t *lock, unsigned long flags) +__raw_read_lock_flags(arch_rwlock_t *lock, unsigned long flags) { __asm__ __volatile__ ( "tbit.nz p6, p0 = %1,%2\n" @@ -177,7 +177,7 @@ __raw_read_lock_flags(raw_rwlock_t *lock, unsigned long flags) #define __raw_read_lock(rw) \ do { \ - raw_rwlock_t *__read_lock_ptr = (rw); \ + arch_rwlock_t *__read_lock_ptr = (rw); \ \ while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \ ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ @@ -190,14 +190,14 @@ do { \ #define __raw_read_unlock(rw) \ do { \ - raw_rwlock_t *__read_lock_ptr = (rw); \ + arch_rwlock_t *__read_lock_ptr = (rw); \ ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ } while (0) #ifdef ASM_SUPPORTED static __always_inline void -__raw_write_lock_flags(raw_rwlock_t *lock, unsigned long flags) +__raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) { __asm__ __volatile__ ( "tbit.nz p6, p0 = %1, %2\n" @@ -235,7 +235,7 @@ __raw_write_lock_flags(raw_rwlock_t *lock, unsigned long flags) (result == 0); \ }) -static inline void __raw_write_unlock(raw_rwlock_t *x) +static inline void __raw_write_unlock(arch_rwlock_t *x) { u8 *y = (u8 *)x; barrier(); @@ -265,7 +265,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *x) (ia64_val == 0); \ }) -static inline void __raw_write_unlock(raw_rwlock_t *x) +static inline void __raw_write_unlock(arch_rwlock_t *x) { barrier(); x->write_lock = 0; @@ -273,10 +273,10 @@ static inline void __raw_write_unlock(raw_rwlock_t *x) #endif /* !ASM_SUPPORTED */ -static inline int __raw_read_trylock(raw_rwlock_t *x) +static inline int __raw_read_trylock(arch_rwlock_t *x) { union { - raw_rwlock_t lock; + arch_rwlock_t lock; __u32 word; } old, new; old.lock = new.lock = *x; diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h index 6a11b65fa66d..e2b42a52a6d3 100644 --- a/arch/ia64/include/asm/spinlock_types.h +++ b/arch/ia64/include/asm/spinlock_types.h @@ -14,8 +14,8 @@ typedef struct { typedef struct { volatile unsigned int read_counter : 31; volatile unsigned int write_lock : 1; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0, 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0, 0 } #endif diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 8acac950a43c..1c76af8c8e1b 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -148,7 +148,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) */ #define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1; @@ -199,7 +199,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) ); } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1, tmp2; @@ -252,7 +252,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) ); } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1; @@ -274,7 +274,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) ); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1, tmp2; @@ -298,7 +298,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) ); } -static inline int __raw_read_trylock(raw_rwlock_t *lock) +static inline int __raw_read_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t*)lock; if (atomic_dec_return(count) >= 0) @@ -307,7 +307,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock) return 0; } -static inline int __raw_write_trylock(raw_rwlock_t *lock) +static inline int __raw_write_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) diff --git a/arch/m32r/include/asm/spinlock_types.h b/arch/m32r/include/asm/spinlock_types.h index 5873a8701107..92e27672661f 100644 --- a/arch/m32r/include/asm/spinlock_types.h +++ b/arch/m32r/include/asm/spinlock_types.h @@ -13,11 +13,11 @@ typedef struct { typedef struct { volatile int lock; -} raw_rwlock_t; +} arch_rwlock_t; #define RW_LOCK_BIAS 0x01000000 #define RW_LOCK_BIAS_STR "0x01000000" -#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } +#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } #endif /* _ASM_M32R_SPINLOCK_TYPES_H */ diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 95edebaaf22a..7bf27c8a3364 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -256,7 +256,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) */ #define __raw_write_can_lock(rw) (!(rw)->lock) -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { unsigned int tmp; @@ -301,7 +301,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) /* Note the use of sub, not subu which will make the kernel die with an overflow exception if we ever try to unlock an rwlock that is already unlocked or is being held by a writer. */ -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { unsigned int tmp; @@ -335,7 +335,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) } } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { unsigned int tmp; @@ -377,7 +377,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) smp_llsc_mb(); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { smp_mb(); @@ -389,7 +389,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) : "memory"); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { unsigned int tmp; int ret; @@ -433,7 +433,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw) return ret; } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { unsigned int tmp; int ret; diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h index b4c5efaadb9c..ee197c2f9c98 100644 --- a/arch/mips/include/asm/spinlock_types.h +++ b/arch/mips/include/asm/spinlock_types.h @@ -18,8 +18,8 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0 } #endif diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 235e7e386e2a..1ff3a0a94a43 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -69,7 +69,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *x) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to grab the same read lock */ -static __inline__ void __raw_read_lock(raw_rwlock_t *rw) +static __inline__ void __raw_read_lock(arch_rwlock_t *rw) { unsigned long flags; local_irq_save(flags); @@ -81,7 +81,7 @@ static __inline__ void __raw_read_lock(raw_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to grab the same read lock */ -static __inline__ void __raw_read_unlock(raw_rwlock_t *rw) +static __inline__ void __raw_read_unlock(arch_rwlock_t *rw) { unsigned long flags; local_irq_save(flags); @@ -93,7 +93,7 @@ static __inline__ void __raw_read_unlock(raw_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to grab the same read lock */ -static __inline__ int __raw_read_trylock(raw_rwlock_t *rw) +static __inline__ int __raw_read_trylock(arch_rwlock_t *rw) { unsigned long flags; retry: @@ -119,7 +119,7 @@ static __inline__ int __raw_read_trylock(raw_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to read_trylock() this lock */ -static __inline__ void __raw_write_lock(raw_rwlock_t *rw) +static __inline__ void __raw_write_lock(arch_rwlock_t *rw) { unsigned long flags; retry: @@ -141,7 +141,7 @@ retry: local_irq_restore(flags); } -static __inline__ void __raw_write_unlock(raw_rwlock_t *rw) +static __inline__ void __raw_write_unlock(arch_rwlock_t *rw) { rw->counter = 0; arch_spin_unlock(&rw->lock); @@ -149,7 +149,7 @@ static __inline__ void __raw_write_unlock(raw_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to read_trylock() this lock */ -static __inline__ int __raw_write_trylock(raw_rwlock_t *rw) +static __inline__ int __raw_write_trylock(arch_rwlock_t *rw) { unsigned long flags; int result = 0; @@ -173,7 +173,7 @@ static __inline__ int __raw_write_trylock(raw_rwlock_t *rw) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw) +static __inline__ int __raw_read_can_lock(arch_rwlock_t *rw) { return rw->counter >= 0; } @@ -182,7 +182,7 @@ static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw) * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw) +static __inline__ int __raw_write_can_lock(arch_rwlock_t *rw) { return !rw->counter; } diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index 396d2746ca57..8c373aa28a86 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -14,8 +14,8 @@ typedef struct { typedef struct { arch_spinlock_t lock; volatile int counter; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED, 0 } +#define __ARCH_RW_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED, 0 } #endif diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index cdcaf6b97087..2fad2c07c593 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -97,7 +97,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) /* We only yield to the hypervisor if we are in shared processor mode */ #define SHARED_PROCESSOR (get_lppaca()->shared_proc) extern void __spin_yield(arch_spinlock_t *lock); -extern void __rw_yield(raw_rwlock_t *lock); +extern void __rw_yield(arch_rwlock_t *lock); #else /* SPLPAR || ISERIES */ #define __spin_yield(x) barrier() #define __rw_yield(x) barrier() @@ -181,7 +181,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock); * This returns the old value in the lock + 1, * so we got a read lock if the return value is > 0. */ -static inline long arch_read_trylock(raw_rwlock_t *rw) +static inline long arch_read_trylock(arch_rwlock_t *rw) { long tmp; @@ -205,7 +205,7 @@ static inline long arch_read_trylock(raw_rwlock_t *rw) * This returns the old value in the lock, * so we got the write lock if the return value is 0. */ -static inline long arch_write_trylock(raw_rwlock_t *rw) +static inline long arch_write_trylock(arch_rwlock_t *rw) { long tmp, token; @@ -225,7 +225,7 @@ static inline long arch_write_trylock(raw_rwlock_t *rw) return tmp; } -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { while (1) { if (likely(arch_read_trylock(rw) > 0)) @@ -239,7 +239,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) } } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { while (1) { if (likely(arch_write_trylock(rw) == 0)) @@ -253,17 +253,17 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) } } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { return arch_read_trylock(rw) > 0; } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { return arch_write_trylock(rw) == 0; } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { long tmp; @@ -280,7 +280,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) : "cr0", "xer", "memory"); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { __asm__ __volatile__("# write_unlock\n\t" LWSYNC_ON_SMP: : :"memory"); diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index f5f39d82711f..2351adc4fdc4 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct { typedef struct { volatile signed int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0 } #endif diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index ee395e392115..58e14fba11b1 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -55,7 +55,7 @@ void __spin_yield(arch_spinlock_t *lock) * This turns out to be the same for read and write locks, since * we only know the holder if it is write-locked. */ -void __rw_yield(raw_rwlock_t *rw) +void __rw_yield(arch_rwlock_t *rw) { int lock_value; unsigned int holder_cpu, yield_count; diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index a94c146657a9..7f98f0e48acb 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -121,14 +121,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) */ #define __raw_write_can_lock(x) ((x)->lock == 0) -extern void _raw_read_lock_wait(raw_rwlock_t *lp); -extern void _raw_read_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags); -extern int _raw_read_trylock_retry(raw_rwlock_t *lp); -extern void _raw_write_lock_wait(raw_rwlock_t *lp); -extern void _raw_write_lock_wait_flags(raw_rwlock_t *lp, unsigned long flags); -extern int _raw_write_trylock_retry(raw_rwlock_t *lp); - -static inline void __raw_read_lock(raw_rwlock_t *rw) +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_read_trylock_retry(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_write_trylock_retry(arch_rwlock_t *lp); + +static inline void __raw_read_lock(arch_rwlock_t *rw) { unsigned int old; old = rw->lock & 0x7fffffffU; @@ -136,7 +136,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) _raw_read_lock_wait(rw); } -static inline void __raw_read_lock_flags(raw_rwlock_t *rw, unsigned long flags) +static inline void __raw_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) { unsigned int old; old = rw->lock & 0x7fffffffU; @@ -144,7 +144,7 @@ static inline void __raw_read_lock_flags(raw_rwlock_t *rw, unsigned long flags) _raw_read_lock_wait_flags(rw, flags); } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { unsigned int old, cmp; @@ -155,24 +155,24 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) } while (cmp != old); } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) _raw_write_lock_wait(rw); } -static inline void __raw_write_lock_flags(raw_rwlock_t *rw, unsigned long flags) +static inline void __raw_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) { if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) _raw_write_lock_wait_flags(rw, flags); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { _raw_compare_and_swap(&rw->lock, 0x80000000, 0); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { unsigned int old; old = rw->lock & 0x7fffffffU; @@ -181,7 +181,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw) return _raw_read_trylock_retry(rw); } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)) return 1; diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index e25c0370f6cd..9c76656a0af0 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0 } #endif diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index f4596452f072..09fee9a1aa15 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -105,7 +105,7 @@ void arch_spin_relax(arch_spinlock_t *lock) } EXPORT_SYMBOL(arch_spin_relax); -void _raw_read_lock_wait(raw_rwlock_t *rw) +void _raw_read_lock_wait(arch_rwlock_t *rw) { unsigned int old; int count = spin_retry; @@ -124,7 +124,7 @@ void _raw_read_lock_wait(raw_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_lock_wait); -void _raw_read_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags) +void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { unsigned int old; int count = spin_retry; @@ -145,7 +145,7 @@ void _raw_read_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags) } EXPORT_SYMBOL(_raw_read_lock_wait_flags); -int _raw_read_trylock_retry(raw_rwlock_t *rw) +int _raw_read_trylock_retry(arch_rwlock_t *rw) { unsigned int old; int count = spin_retry; @@ -161,7 +161,7 @@ int _raw_read_trylock_retry(raw_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_trylock_retry); -void _raw_write_lock_wait(raw_rwlock_t *rw) +void _raw_write_lock_wait(arch_rwlock_t *rw) { int count = spin_retry; @@ -178,7 +178,7 @@ void _raw_write_lock_wait(raw_rwlock_t *rw) } EXPORT_SYMBOL(_raw_write_lock_wait); -void _raw_write_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags) +void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { int count = spin_retry; @@ -197,7 +197,7 @@ void _raw_write_lock_wait_flags(raw_rwlock_t *rw, unsigned long flags) } EXPORT_SYMBOL(_raw_write_lock_wait_flags); -int _raw_write_trylock_retry(raw_rwlock_t *rw) +int _raw_write_trylock_retry(arch_rwlock_t *rw) { int count = spin_retry; diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index da1c6491ed4b..7f3626aac869 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -108,7 +108,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) */ #define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { unsigned long tmp; @@ -126,7 +126,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) ); } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { unsigned long tmp; @@ -142,7 +142,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) ); } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { unsigned long tmp; @@ -160,7 +160,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) ); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { __asm__ __volatile__ ( "mov.l %1, @%0 ! __raw_write_unlock \n\t" @@ -170,7 +170,7 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) ); } -static inline int __raw_read_trylock(raw_rwlock_t *rw) +static inline int __raw_read_trylock(arch_rwlock_t *rw) { unsigned long tmp, oldval; @@ -193,7 +193,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw) return (oldval > 0); } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { unsigned long tmp, oldval; diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h index a3be2db960ed..9b7560db06ca 100644 --- a/arch/sh/include/asm/spinlock_types.h +++ b/arch/sh/include/asm/spinlock_types.h @@ -13,9 +13,9 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; #define RW_LOCK_BIAS 0x01000000 -#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } +#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } #endif diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 9b0f2f53c81c..06d37e588fde 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -65,7 +65,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * Sort of like atomic_t's on Sparc, but even more clever. * * ------------------------------------ - * | 24-bit counter | wlock | raw_rwlock_t + * | 24-bit counter | wlock | arch_rwlock_t * ------------------------------------ * 31 8 7 0 * @@ -76,9 +76,9 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * * Unfortunately this scheme limits us to ~16,000,000 cpus. */ -static inline void arch_read_lock(raw_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { - register raw_rwlock_t *lp asm("g1"); + register arch_rwlock_t *lp asm("g1"); lp = rw; __asm__ __volatile__( "mov %%o7, %%g4\n\t" @@ -96,9 +96,9 @@ do { unsigned long flags; \ local_irq_restore(flags); \ } while(0) -static inline void arch_read_unlock(raw_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { - register raw_rwlock_t *lp asm("g1"); + register arch_rwlock_t *lp asm("g1"); lp = rw; __asm__ __volatile__( "mov %%o7, %%g4\n\t" @@ -116,9 +116,9 @@ do { unsigned long flags; \ local_irq_restore(flags); \ } while(0) -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { - register raw_rwlock_t *lp asm("g1"); + register arch_rwlock_t *lp asm("g1"); lp = rw; __asm__ __volatile__( "mov %%o7, %%g4\n\t" @@ -130,7 +130,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) *(volatile __u32 *)&lp->lock = ~0U; } -static inline int __raw_write_trylock(raw_rwlock_t *rw) +static inline int __raw_write_trylock(arch_rwlock_t *rw) { unsigned int val; @@ -150,9 +150,9 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) return (val == 0); } -static inline int arch_read_trylock(raw_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { - register raw_rwlock_t *lp asm("g1"); + register arch_rwlock_t *lp asm("g1"); register int res asm("o0"); lp = rw; __asm__ __volatile__( diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 7cf58a2fcda4..2b22d7f2c2fb 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -92,7 +92,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline arch_read_lock(raw_rwlock_t *lock) +static void inline arch_read_lock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -115,7 +115,7 @@ static void inline arch_read_lock(raw_rwlock_t *lock) : "memory"); } -static int inline arch_read_trylock(raw_rwlock_t *lock) +static int inline arch_read_trylock(arch_rwlock_t *lock) { int tmp1, tmp2; @@ -136,7 +136,7 @@ static int inline arch_read_trylock(raw_rwlock_t *lock) return tmp1; } -static void inline arch_read_unlock(raw_rwlock_t *lock) +static void inline arch_read_unlock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -152,7 +152,7 @@ static void inline arch_read_unlock(raw_rwlock_t *lock) : "memory"); } -static void inline arch_write_lock(raw_rwlock_t *lock) +static void inline arch_write_lock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -177,7 +177,7 @@ static void inline arch_write_lock(raw_rwlock_t *lock) : "memory"); } -static void inline arch_write_unlock(raw_rwlock_t *lock) +static void inline arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -186,7 +186,7 @@ static void inline arch_write_unlock(raw_rwlock_t *lock) : "memory"); } -static int inline arch_write_trylock(raw_rwlock_t *lock) +static int inline arch_write_trylock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h index c145e63a5d66..9c454fdeaad8 100644 --- a/arch/sparc/include/asm/spinlock_types.h +++ b/arch/sparc/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct { typedef struct { volatile unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { 0 } +#define __ARCH_RW_LOCK_UNLOCKED { 0 } #endif diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index ab9055fd57d9..99cb86e843a0 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -232,7 +232,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -static inline int __raw_read_can_lock(raw_rwlock_t *lock) +static inline int __raw_read_can_lock(arch_rwlock_t *lock) { return (int)(lock)->lock > 0; } @@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(raw_rwlock_t *lock) * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -static inline int __raw_write_can_lock(raw_rwlock_t *lock) +static inline int __raw_write_can_lock(arch_rwlock_t *lock) { return (lock)->lock == RW_LOCK_BIAS; } -static inline void __raw_read_lock(raw_rwlock_t *rw) +static inline void __raw_read_lock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" "jns 1f\n" @@ -255,7 +255,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) ::LOCK_PTR_REG (rw) : "memory"); } -static inline void __raw_write_lock(raw_rwlock_t *rw) +static inline void __raw_write_lock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" "jz 1f\n" @@ -264,7 +264,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); } -static inline int __raw_read_trylock(raw_rwlock_t *lock) +static inline int __raw_read_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; @@ -274,7 +274,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock) return 0; } -static inline int __raw_write_trylock(raw_rwlock_t *lock) +static inline int __raw_write_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; @@ -284,12 +284,12 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock) return 0; } -static inline void __raw_read_unlock(raw_rwlock_t *rw) +static inline void __raw_read_unlock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); } -static inline void __raw_write_unlock(raw_rwlock_t *rw) +static inline void __raw_write_unlock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX "addl %1, %0" : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 696f8364a4f3..dcb48b2edc11 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -13,8 +13,8 @@ typedef struct arch_spinlock { typedef struct { unsigned int lock; -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } +#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index f8c935206a41..bd31808c7d8e 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -9,7 +9,7 @@ * Released under the General Public License (GPL). */ typedef struct { - raw_rwlock_t raw_lock; + arch_rwlock_t raw_lock; #ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif @@ -32,14 +32,14 @@ typedef struct { #ifdef CONFIG_DEBUG_SPINLOCK #define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + (rwlock_t) { .raw_lock = __ARCH_RW_LOCK_UNLOCKED, \ .magic = RWLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ .owner_cpu = -1, \ RW_DEP_MAP_INIT(lockname) } #else #define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + (rwlock_t) { .raw_lock = __ARCH_RW_LOCK_UNLOCKED, \ RW_DEP_MAP_INIT(lockname) } #endif diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index de3a022489c6..53bc2213b414 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -8,7 +8,7 @@ * * on SMP builds: * - * asm/spinlock_types.h: contains the arch_spinlock_t/raw_rwlock_t and the + * asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the * initializers * * linux/spinlock_types.h: @@ -75,7 +75,7 @@ #define __lockfunc __attribute__((section(".spinlock.text"))) /* - * Pull the arch_spinlock_t and raw_rwlock_t definitions: + * Pull the arch_spinlock_t and arch_rwlock_t definitions: */ #include diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 10db021f4875..c09b6407ae1b 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -30,8 +30,8 @@ typedef struct { } arch_spinlock_t; typedef struct { /* no debug version on UP */ -} raw_rwlock_t; +} arch_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED { } +#define __ARCH_RW_LOCK_UNLOCKED { } #endif /* __LINUX_SPINLOCK_TYPES_UP_H */ diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 1304fe094546..3f72f10d9cb0 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -41,7 +41,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map(&lock->dep_map, name, key, 0); #endif - lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED; + lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; lock->magic = RWLOCK_MAGIC; lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; -- cgit v1.2.3 From e5931943d02bf751b1ec849c0d2ade23d76a8d41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Dec 2009 20:08:46 +0100 Subject: locking: Convert raw_rwlock functions to arch_rwlock Name space cleanup for rwlock functions. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: David S. Miller Acked-by: Ingo Molnar Cc: linux-arch@vger.kernel.org --- arch/alpha/include/asm/spinlock.h | 20 +++++++------- arch/arm/include/asm/spinlock.h | 20 +++++++------- arch/blackfin/include/asm/spinlock.h | 40 ++++++++++++++-------------- arch/cris/include/arch-v32/arch/spinlock.h | 16 ++++++------ arch/ia64/include/asm/spinlock.h | 32 +++++++++++------------ arch/m32r/include/asm/spinlock.h | 20 +++++++------- arch/mips/include/asm/spinlock.h | 42 +++++++++++++++--------------- arch/parisc/include/asm/spinlock.h | 20 +++++++------- arch/powerpc/include/asm/spinlock.h | 32 +++++++++++------------ arch/s390/include/asm/spinlock.h | 20 +++++++------- arch/s390/lib/spinlock.c | 12 ++++----- arch/sh/include/asm/spinlock.h | 32 +++++++++++------------ arch/sparc/include/asm/spinlock_32.h | 32 +++++++++++------------ arch/sparc/include/asm/spinlock_64.h | 22 ++++++++-------- arch/x86/include/asm/spinlock.h | 20 +++++++------- include/linux/rwlock.h | 20 +++++++------- include/linux/spinlock_up.h | 16 ++++++------ lib/spinlock_debug.c | 16 ++++++------ 18 files changed, 216 insertions(+), 216 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index e8b2970f037b..d0faca1e992d 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -50,17 +50,17 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) /***********************************************************/ -static inline int __raw_read_can_lock(arch_rwlock_t *lock) +static inline int arch_read_can_lock(arch_rwlock_t *lock) { return (lock->lock & 1) == 0; } -static inline int __raw_write_can_lock(arch_rwlock_t *lock) +static inline int arch_write_can_lock(arch_rwlock_t *lock) { return lock->lock == 0; } -static inline void __raw_read_lock(arch_rwlock_t *lock) +static inline void arch_read_lock(arch_rwlock_t *lock) { long regx; @@ -80,7 +80,7 @@ static inline void __raw_read_lock(arch_rwlock_t *lock) : "m" (*lock) : "memory"); } -static inline void __raw_write_lock(arch_rwlock_t *lock) +static inline void arch_write_lock(arch_rwlock_t *lock) { long regx; @@ -100,7 +100,7 @@ static inline void __raw_write_lock(arch_rwlock_t *lock) : "m" (*lock) : "memory"); } -static inline int __raw_read_trylock(arch_rwlock_t * lock) +static inline int arch_read_trylock(arch_rwlock_t * lock) { long regx; int success; @@ -122,7 +122,7 @@ static inline int __raw_read_trylock(arch_rwlock_t * lock) return success; } -static inline int __raw_write_trylock(arch_rwlock_t * lock) +static inline int arch_write_trylock(arch_rwlock_t * lock) { long regx; int success; @@ -144,7 +144,7 @@ static inline int __raw_write_trylock(arch_rwlock_t * lock) return success; } -static inline void __raw_read_unlock(arch_rwlock_t * lock) +static inline void arch_read_unlock(arch_rwlock_t * lock) { long regx; __asm__ __volatile__( @@ -160,14 +160,14 @@ static inline void __raw_read_unlock(arch_rwlock_t * lock) : "m" (*lock) : "memory"); } -static inline void __raw_write_unlock(arch_rwlock_t * lock) +static inline void arch_write_unlock(arch_rwlock_t * lock) { mb(); lock->lock = 0; } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index a8671d8bc7d4..c91c64cab922 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -86,7 +86,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * just write zero since the lock is exclusively held. */ -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned long tmp; @@ -106,7 +106,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) smp_mb(); } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned long tmp; @@ -126,7 +126,7 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw) } } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { smp_mb(); @@ -142,7 +142,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw) } /* write_can_lock - would write_trylock() succeed? */ -#define __raw_write_can_lock(x) ((x)->lock == 0) +#define arch_write_can_lock(x) ((x)->lock == 0) /* * Read locks are a bit more hairy: @@ -156,7 +156,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw) * currently active. However, we know we won't have any write * locks. */ -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp, tmp2; @@ -176,7 +176,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) smp_mb(); } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned long tmp, tmp2; @@ -198,7 +198,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) : "cc"); } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned long tmp, tmp2 = 1; @@ -215,10 +215,10 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw) } /* read_can_lock - would read_trylock() succeed? */ -#define __raw_read_can_lock(x) ((x)->lock < 0x80000000) +#define arch_read_can_lock(x) ((x)->lock < 0x80000000) -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 7e1c56b0a571..1942ccfedbe0 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -17,12 +17,12 @@ asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr); asmlinkage void __raw_spin_lock_asm(volatile int *ptr); asmlinkage int __raw_spin_trylock_asm(volatile int *ptr); asmlinkage void __raw_spin_unlock_asm(volatile int *ptr); -asmlinkage void __raw_read_lock_asm(volatile int *ptr); -asmlinkage int __raw_read_trylock_asm(volatile int *ptr); -asmlinkage void __raw_read_unlock_asm(volatile int *ptr); -asmlinkage void __raw_write_lock_asm(volatile int *ptr); -asmlinkage int __raw_write_trylock_asm(volatile int *ptr); -asmlinkage void __raw_write_unlock_asm(volatile int *ptr); +asmlinkage void arch_read_lock_asm(volatile int *ptr); +asmlinkage int arch_read_trylock_asm(volatile int *ptr); +asmlinkage void arch_read_unlock_asm(volatile int *ptr); +asmlinkage void arch_write_lock_asm(volatile int *ptr); +asmlinkage int arch_write_trylock_asm(volatile int *ptr); +asmlinkage void arch_write_unlock_asm(volatile int *ptr); static inline int arch_spin_is_locked(arch_spinlock_t *lock) { @@ -52,44 +52,44 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } -static inline int __raw_read_can_lock(arch_rwlock_t *rw) +static inline int arch_read_can_lock(arch_rwlock_t *rw) { return __raw_uncached_fetch_asm(&rw->lock) > 0; } -static inline int __raw_write_can_lock(arch_rwlock_t *rw) +static inline int arch_write_can_lock(arch_rwlock_t *rw) { return __raw_uncached_fetch_asm(&rw->lock) == RW_LOCK_BIAS; } -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { - __raw_read_lock_asm(&rw->lock); + arch_read_lock_asm(&rw->lock); } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { - return __raw_read_trylock_asm(&rw->lock); + return arch_read_trylock_asm(&rw->lock); } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { - __raw_read_unlock_asm(&rw->lock); + arch_read_unlock_asm(&rw->lock); } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { - __raw_write_lock_asm(&rw->lock); + arch_write_lock_asm(&rw->lock); } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { - return __raw_write_trylock_asm(&rw->lock); + return arch_write_trylock_asm(&rw->lock); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { - __raw_write_unlock_asm(&rw->lock); + arch_write_unlock_asm(&rw->lock); } #define arch_spin_relax(lock) cpu_relax() diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h index 1d7d3a8046cb..f171a6600fbc 100644 --- a/arch/cris/include/arch-v32/arch/spinlock.h +++ b/arch/cris/include/arch-v32/arch/spinlock.h @@ -56,17 +56,17 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) * */ -static inline int __raw_read_can_lock(arch_rwlock_t *x) +static inline int arch_read_can_lock(arch_rwlock_t *x) { return (int)(x)->lock > 0; } -static inline int __raw_write_can_lock(arch_rwlock_t *x) +static inline int arch_write_can_lock(arch_rwlock_t *x) { return (x)->lock == RW_LOCK_BIAS; } -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); while (rw->lock == 0); @@ -74,7 +74,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) arch_spin_unlock(&rw->slock); } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); while (rw->lock != RW_LOCK_BIAS); @@ -82,14 +82,14 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) arch_spin_unlock(&rw->slock); } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); rw->lock++; arch_spin_unlock(&rw->slock); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { arch_spin_lock(&rw->slock); while (rw->lock != RW_LOCK_BIAS); @@ -97,7 +97,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw) arch_spin_unlock(&rw->slock); } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { int ret = 0; arch_spin_lock(&rw->slock); @@ -109,7 +109,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw) return ret; } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { int ret = 0; arch_spin_lock(&rw->slock); diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 6715b6a8ebc3..1a91c9121d17 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -140,13 +140,13 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) __ticket_spin_unlock_wait(lock); } -#define __raw_read_can_lock(rw) (*(volatile int *)(rw) >= 0) -#define __raw_write_can_lock(rw) (*(volatile int *)(rw) == 0) +#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0) +#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0) #ifdef ASM_SUPPORTED static __always_inline void -__raw_read_lock_flags(arch_rwlock_t *lock, unsigned long flags) +arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags) { __asm__ __volatile__ ( "tbit.nz p6, p0 = %1,%2\n" @@ -169,13 +169,13 @@ __raw_read_lock_flags(arch_rwlock_t *lock, unsigned long flags) : "p6", "p7", "r2", "memory"); } -#define __raw_read_lock(lock) __raw_read_lock_flags(lock, 0) +#define arch_read_lock(lock) arch_read_lock_flags(lock, 0) #else /* !ASM_SUPPORTED */ -#define __raw_read_lock_flags(rw, flags) __raw_read_lock(rw) +#define arch_read_lock_flags(rw, flags) arch_read_lock(rw) -#define __raw_read_lock(rw) \ +#define arch_read_lock(rw) \ do { \ arch_rwlock_t *__read_lock_ptr = (rw); \ \ @@ -188,7 +188,7 @@ do { \ #endif /* !ASM_SUPPORTED */ -#define __raw_read_unlock(rw) \ +#define arch_read_unlock(rw) \ do { \ arch_rwlock_t *__read_lock_ptr = (rw); \ ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ @@ -197,7 +197,7 @@ do { \ #ifdef ASM_SUPPORTED static __always_inline void -__raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) +arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) { __asm__ __volatile__ ( "tbit.nz p6, p0 = %1, %2\n" @@ -221,9 +221,9 @@ __raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) : "ar.ccv", "p6", "p7", "r2", "r29", "memory"); } -#define __raw_write_lock(rw) __raw_write_lock_flags(rw, 0) +#define arch_write_lock(rw) arch_write_lock_flags(rw, 0) -#define __raw_write_trylock(rw) \ +#define arch_write_trylock(rw) \ ({ \ register long result; \ \ @@ -235,7 +235,7 @@ __raw_write_lock_flags(arch_rwlock_t *lock, unsigned long flags) (result == 0); \ }) -static inline void __raw_write_unlock(arch_rwlock_t *x) +static inline void arch_write_unlock(arch_rwlock_t *x) { u8 *y = (u8 *)x; barrier(); @@ -244,9 +244,9 @@ static inline void __raw_write_unlock(arch_rwlock_t *x) #else /* !ASM_SUPPORTED */ -#define __raw_write_lock_flags(l, flags) __raw_write_lock(l) +#define arch_write_lock_flags(l, flags) arch_write_lock(l) -#define __raw_write_lock(l) \ +#define arch_write_lock(l) \ ({ \ __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \ __u32 *ia64_write_lock_ptr = (__u32 *) (l); \ @@ -257,7 +257,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *x) } while (ia64_val); \ }) -#define __raw_write_trylock(rw) \ +#define arch_write_trylock(rw) \ ({ \ __u64 ia64_val; \ __u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1); \ @@ -265,7 +265,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *x) (ia64_val == 0); \ }) -static inline void __raw_write_unlock(arch_rwlock_t *x) +static inline void arch_write_unlock(arch_rwlock_t *x) { barrier(); x->write_lock = 0; @@ -273,7 +273,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *x) #endif /* !ASM_SUPPORTED */ -static inline int __raw_read_trylock(arch_rwlock_t *x) +static inline int arch_read_trylock(arch_rwlock_t *x) { union { arch_rwlock_t lock; diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 1c76af8c8e1b..179a06489b10 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -140,15 +140,15 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_read_can_lock(x) ((int)(x)->lock > 0) +#define arch_read_can_lock(x) ((int)(x)->lock > 0) /** * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) +#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1; @@ -199,7 +199,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) ); } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1, tmp2; @@ -252,7 +252,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) ); } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1; @@ -274,7 +274,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) ); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { unsigned long tmp0, tmp1, tmp2; @@ -298,7 +298,7 @@ static inline void __raw_write_unlock(arch_rwlock_t *rw) ); } -static inline int __raw_read_trylock(arch_rwlock_t *lock) +static inline int arch_read_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t*)lock; if (atomic_dec_return(count) >= 0) @@ -307,7 +307,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *lock) return 0; } -static inline int __raw_write_trylock(arch_rwlock_t *lock) +static inline int arch_write_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; if (atomic_sub_and_test(RW_LOCK_BIAS, count)) @@ -316,8 +316,8 @@ static inline int __raw_write_trylock(arch_rwlock_t *lock) return 0; } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 7bf27c8a3364..21ef9efbde43 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -248,21 +248,21 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_read_can_lock(rw) ((rw)->lock >= 0) +#define arch_read_can_lock(rw) ((rw)->lock >= 0) /* * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_write_can_lock(rw) (!(rw)->lock) +#define arch_write_can_lock(rw) (!(rw)->lock) -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned int tmp; if (R10000_LLSC_WAR) { __asm__ __volatile__( - " .set noreorder # __raw_read_lock \n" + " .set noreorder # arch_read_lock \n" "1: ll %1, %2 \n" " bltz %1, 1b \n" " addu %1, 1 \n" @@ -275,7 +275,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) : "memory"); } else { __asm__ __volatile__( - " .set noreorder # __raw_read_lock \n" + " .set noreorder # arch_read_lock \n" "1: ll %1, %2 \n" " bltz %1, 2f \n" " addu %1, 1 \n" @@ -301,7 +301,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) /* Note the use of sub, not subu which will make the kernel die with an overflow exception if we ever try to unlock an rwlock that is already unlocked or is being held by a writer. */ -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned int tmp; @@ -309,7 +309,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) if (R10000_LLSC_WAR) { __asm__ __volatile__( - "1: ll %1, %2 # __raw_read_unlock \n" + "1: ll %1, %2 # arch_read_unlock \n" " sub %1, 1 \n" " sc %1, %0 \n" " beqzl %1, 1b \n" @@ -318,7 +318,7 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) : "memory"); } else { __asm__ __volatile__( - " .set noreorder # __raw_read_unlock \n" + " .set noreorder # arch_read_unlock \n" "1: ll %1, %2 \n" " sub %1, 1 \n" " sc %1, %0 \n" @@ -335,13 +335,13 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) } } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned int tmp; if (R10000_LLSC_WAR) { __asm__ __volatile__( - " .set noreorder # __raw_write_lock \n" + " .set noreorder # arch_write_lock \n" "1: ll %1, %2 \n" " bnez %1, 1b \n" " lui %1, 0x8000 \n" @@ -354,7 +354,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) : "memory"); } else { __asm__ __volatile__( - " .set noreorder # __raw_write_lock \n" + " .set noreorder # arch_write_lock \n" "1: ll %1, %2 \n" " bnez %1, 2f \n" " lui %1, 0x8000 \n" @@ -377,26 +377,26 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) smp_llsc_mb(); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { smp_mb(); __asm__ __volatile__( - " # __raw_write_unlock \n" + " # arch_write_unlock \n" " sw $0, %0 \n" : "=m" (rw->lock) : "m" (rw->lock) : "memory"); } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned int tmp; int ret; if (R10000_LLSC_WAR) { __asm__ __volatile__( - " .set noreorder # __raw_read_trylock \n" + " .set noreorder # arch_read_trylock \n" " li %2, 0 \n" "1: ll %1, %3 \n" " bltz %1, 2f \n" @@ -413,7 +413,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw) : "memory"); } else { __asm__ __volatile__( - " .set noreorder # __raw_read_trylock \n" + " .set noreorder # arch_read_trylock \n" " li %2, 0 \n" "1: ll %1, %3 \n" " bltz %1, 2f \n" @@ -433,14 +433,14 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw) return ret; } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned int tmp; int ret; if (R10000_LLSC_WAR) { __asm__ __volatile__( - " .set noreorder # __raw_write_trylock \n" + " .set noreorder # arch_write_trylock \n" " li %2, 0 \n" "1: ll %1, %3 \n" " bnez %1, 2f \n" @@ -457,7 +457,7 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw) : "memory"); } else { __asm__ __volatile__( - " .set noreorder # __raw_write_trylock \n" + " .set noreorder # arch_write_trylock \n" " li %2, 0 \n" "1: ll %1, %3 \n" " bnez %1, 2f \n" @@ -480,8 +480,8 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw) return ret; } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 1ff3a0a94a43..74036f436a3b 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -69,7 +69,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *x) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to grab the same read lock */ -static __inline__ void __raw_read_lock(arch_rwlock_t *rw) +static __inline__ void arch_read_lock(arch_rwlock_t *rw) { unsigned long flags; local_irq_save(flags); @@ -81,7 +81,7 @@ static __inline__ void __raw_read_lock(arch_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to grab the same read lock */ -static __inline__ void __raw_read_unlock(arch_rwlock_t *rw) +static __inline__ void arch_read_unlock(arch_rwlock_t *rw) { unsigned long flags; local_irq_save(flags); @@ -93,7 +93,7 @@ static __inline__ void __raw_read_unlock(arch_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to grab the same read lock */ -static __inline__ int __raw_read_trylock(arch_rwlock_t *rw) +static __inline__ int arch_read_trylock(arch_rwlock_t *rw) { unsigned long flags; retry: @@ -119,7 +119,7 @@ static __inline__ int __raw_read_trylock(arch_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to read_trylock() this lock */ -static __inline__ void __raw_write_lock(arch_rwlock_t *rw) +static __inline__ void arch_write_lock(arch_rwlock_t *rw) { unsigned long flags; retry: @@ -141,7 +141,7 @@ retry: local_irq_restore(flags); } -static __inline__ void __raw_write_unlock(arch_rwlock_t *rw) +static __inline__ void arch_write_unlock(arch_rwlock_t *rw) { rw->counter = 0; arch_spin_unlock(&rw->lock); @@ -149,7 +149,7 @@ static __inline__ void __raw_write_unlock(arch_rwlock_t *rw) /* Note that we have to ensure interrupts are disabled in case we're * interrupted by some other code that wants to read_trylock() this lock */ -static __inline__ int __raw_write_trylock(arch_rwlock_t *rw) +static __inline__ int arch_write_trylock(arch_rwlock_t *rw) { unsigned long flags; int result = 0; @@ -173,7 +173,7 @@ static __inline__ int __raw_write_trylock(arch_rwlock_t *rw) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -static __inline__ int __raw_read_can_lock(arch_rwlock_t *rw) +static __inline__ int arch_read_can_lock(arch_rwlock_t *rw) { return rw->counter >= 0; } @@ -182,13 +182,13 @@ static __inline__ int __raw_read_can_lock(arch_rwlock_t *rw) * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -static __inline__ int __raw_write_can_lock(arch_rwlock_t *rw) +static __inline__ int arch_write_can_lock(arch_rwlock_t *rw) { return !rw->counter; } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 2fad2c07c593..764094cff681 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -166,8 +166,8 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock); * read-locks. */ -#define __raw_read_can_lock(rw) ((rw)->lock >= 0) -#define __raw_write_can_lock(rw) (!(rw)->lock) +#define arch_read_can_lock(rw) ((rw)->lock >= 0) +#define arch_write_can_lock(rw) (!(rw)->lock) #ifdef CONFIG_PPC64 #define __DO_SIGN_EXTEND "extsw %0,%0\n" @@ -181,7 +181,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock); * This returns the old value in the lock + 1, * so we got a read lock if the return value is > 0. */ -static inline long arch_read_trylock(arch_rwlock_t *rw) +static inline long __arch_read_trylock(arch_rwlock_t *rw) { long tmp; @@ -205,7 +205,7 @@ static inline long arch_read_trylock(arch_rwlock_t *rw) * This returns the old value in the lock, * so we got the write lock if the return value is 0. */ -static inline long arch_write_trylock(arch_rwlock_t *rw) +static inline long __arch_write_trylock(arch_rwlock_t *rw) { long tmp, token; @@ -225,10 +225,10 @@ static inline long arch_write_trylock(arch_rwlock_t *rw) return tmp; } -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { while (1) { - if (likely(arch_read_trylock(rw) > 0)) + if (likely(__arch_read_trylock(rw) > 0)) break; do { HMT_low(); @@ -239,10 +239,10 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) } } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { while (1) { - if (likely(arch_write_trylock(rw) == 0)) + if (likely(__arch_write_trylock(rw) == 0)) break; do { HMT_low(); @@ -253,17 +253,17 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) } } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { - return arch_read_trylock(rw) > 0; + return __arch_read_trylock(rw) > 0; } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { - return arch_write_trylock(rw) == 0; + return __arch_write_trylock(rw) == 0; } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { long tmp; @@ -280,15 +280,15 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) : "cr0", "xer", "memory"); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { __asm__ __volatile__("# write_unlock\n\t" LWSYNC_ON_SMP: : :"memory"); rw->lock = 0; } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) __spin_yield(lock) #define arch_read_relax(lock) __rw_yield(lock) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 7f98f0e48acb..a587907d77f3 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -113,13 +113,13 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_read_can_lock(x) ((int)(x)->lock >= 0) +#define arch_read_can_lock(x) ((int)(x)->lock >= 0) /** * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_write_can_lock(x) ((x)->lock == 0) +#define arch_write_can_lock(x) ((x)->lock == 0) extern void _raw_read_lock_wait(arch_rwlock_t *lp); extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); @@ -128,7 +128,7 @@ extern void _raw_write_lock_wait(arch_rwlock_t *lp); extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned int old; old = rw->lock & 0x7fffffffU; @@ -136,7 +136,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) _raw_read_lock_wait(rw); } -static inline void __raw_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) +static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) { unsigned int old; old = rw->lock & 0x7fffffffU; @@ -144,7 +144,7 @@ static inline void __raw_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) _raw_read_lock_wait_flags(rw, flags); } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned int old, cmp; @@ -155,24 +155,24 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) } while (cmp != old); } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) _raw_write_lock_wait(rw); } -static inline void __raw_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) +static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) { if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) _raw_write_lock_wait_flags(rw, flags); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { _raw_compare_and_swap(&rw->lock, 0x80000000, 0); } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned int old; old = rw->lock & 0x7fffffffU; @@ -181,7 +181,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw) return _raw_read_trylock_retry(rw); } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)) return 1; diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 09fee9a1aa15..10754a375668 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -115,7 +115,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) _raw_yield(); count = spin_retry; } - if (!__raw_read_can_lock(rw)) + if (!arch_read_can_lock(rw)) continue; old = rw->lock & 0x7fffffffU; if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) @@ -135,7 +135,7 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) _raw_yield(); count = spin_retry; } - if (!__raw_read_can_lock(rw)) + if (!arch_read_can_lock(rw)) continue; old = rw->lock & 0x7fffffffU; local_irq_disable(); @@ -151,7 +151,7 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) int count = spin_retry; while (count-- > 0) { - if (!__raw_read_can_lock(rw)) + if (!arch_read_can_lock(rw)) continue; old = rw->lock & 0x7fffffffU; if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) @@ -170,7 +170,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) _raw_yield(); count = spin_retry; } - if (!__raw_write_can_lock(rw)) + if (!arch_write_can_lock(rw)) continue; if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) return; @@ -188,7 +188,7 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) _raw_yield(); count = spin_retry; } - if (!__raw_write_can_lock(rw)) + if (!arch_write_can_lock(rw)) continue; local_irq_disable(); if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) @@ -202,7 +202,7 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) int count = spin_retry; while (count-- > 0) { - if (!__raw_write_can_lock(rw)) + if (!arch_write_can_lock(rw)) continue; if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) return 1; diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index 7f3626aac869..bdc0f3b6c56a 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -100,21 +100,21 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_read_can_lock(x) ((x)->lock > 0) +#define arch_read_can_lock(x) ((x)->lock > 0) /** * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) +#define arch_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned long tmp; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%1, %0 ! __raw_read_lock \n\t" + "movli.l @%1, %0 ! arch_read_lock \n\t" "cmp/pl %0 \n\t" "bf 1b \n\t" "add #-1, %0 \n\t" @@ -126,13 +126,13 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) ); } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned long tmp; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%1, %0 ! __raw_read_unlock \n\t" + "movli.l @%1, %0 ! arch_read_unlock \n\t" "add #1, %0 \n\t" "movco.l %0, @%1 \n\t" "bf 1b \n\t" @@ -142,13 +142,13 @@ static inline void __raw_read_unlock(arch_rwlock_t *rw) ); } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned long tmp; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%1, %0 ! __raw_write_lock \n\t" + "movli.l @%1, %0 ! arch_write_lock \n\t" "cmp/hs %2, %0 \n\t" "bf 1b \n\t" "sub %2, %0 \n\t" @@ -160,23 +160,23 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) ); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { __asm__ __volatile__ ( - "mov.l %1, @%0 ! __raw_write_unlock \n\t" + "mov.l %1, @%0 ! arch_write_unlock \n\t" : : "r" (&rw->lock), "r" (RW_LOCK_BIAS) : "t", "memory" ); } -static inline int __raw_read_trylock(arch_rwlock_t *rw) +static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned long tmp, oldval; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%2, %0 ! __raw_read_trylock \n\t" + "movli.l @%2, %0 ! arch_read_trylock \n\t" "mov %0, %1 \n\t" "cmp/pl %0 \n\t" "bf 2f \n\t" @@ -193,13 +193,13 @@ static inline int __raw_read_trylock(arch_rwlock_t *rw) return (oldval > 0); } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned long tmp, oldval; __asm__ __volatile__ ( "1: \n\t" - "movli.l @%2, %0 ! __raw_write_trylock \n\t" + "movli.l @%2, %0 ! arch_write_trylock \n\t" "mov %0, %1 \n\t" "cmp/hs %3, %0 \n\t" "bf 2f \n\t" @@ -216,8 +216,8 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw) return (oldval > (RW_LOCK_BIAS - 1)); } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 06d37e588fde..7f9b9dba38a6 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -76,7 +76,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) * * Unfortunately this scheme limits us to ~16,000,000 cpus. */ -static inline void arch_read_lock(arch_rwlock_t *rw) +static inline void __arch_read_lock(arch_rwlock_t *rw) { register arch_rwlock_t *lp asm("g1"); lp = rw; @@ -89,14 +89,14 @@ static inline void arch_read_lock(arch_rwlock_t *rw) : "g2", "g4", "memory", "cc"); } -#define __raw_read_lock(lock) \ +#define arch_read_lock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - arch_read_lock(lock); \ + __arch_read_lock(lock); \ local_irq_restore(flags); \ } while(0) -static inline void arch_read_unlock(arch_rwlock_t *rw) +static inline void __arch_read_unlock(arch_rwlock_t *rw) { register arch_rwlock_t *lp asm("g1"); lp = rw; @@ -109,14 +109,14 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) : "g2", "g4", "memory", "cc"); } -#define __raw_read_unlock(lock) \ +#define arch_read_unlock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - arch_read_unlock(lock); \ + __arch_read_unlock(lock); \ local_irq_restore(flags); \ } while(0) -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { register arch_rwlock_t *lp asm("g1"); lp = rw; @@ -130,7 +130,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) *(volatile __u32 *)&lp->lock = ~0U; } -static inline int __raw_write_trylock(arch_rwlock_t *rw) +static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned int val; @@ -150,7 +150,7 @@ static inline int __raw_write_trylock(arch_rwlock_t *rw) return (val == 0); } -static inline int arch_read_trylock(arch_rwlock_t *rw) +static inline int __arch_read_trylock(arch_rwlock_t *rw) { register arch_rwlock_t *lp asm("g1"); register int res asm("o0"); @@ -165,27 +165,27 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) return res; } -#define __raw_read_trylock(lock) \ +#define arch_read_trylock(lock) \ ({ unsigned long flags; \ int res; \ local_irq_save(flags); \ - res = arch_read_trylock(lock); \ + res = __arch_read_trylock(lock); \ local_irq_restore(flags); \ res; \ }) -#define __raw_write_unlock(rw) do { (rw)->lock = 0; } while(0) +#define arch_write_unlock(rw) do { (rw)->lock = 0; } while(0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define __raw_read_lock_flags(rw, flags) __raw_read_lock(rw) -#define __raw_write_lock_flags(rw, flags) __raw_write_lock(rw) +#define arch_read_lock_flags(rw, flags) arch_read_lock(rw) +#define arch_write_lock_flags(rw, flags) arch_write_lock(rw) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() -#define __raw_read_can_lock(rw) (!((rw)->lock & 0xff)) -#define __raw_write_can_lock(rw) (!(rw)->lock) +#define arch_read_can_lock(rw) (!((rw)->lock & 0xff)) +#define arch_write_can_lock(rw) (!(rw)->lock) #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 2b22d7f2c2fb..073936a8b275 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -210,17 +210,17 @@ static int inline arch_write_trylock(arch_rwlock_t *lock) return result; } -#define __raw_read_lock(p) arch_read_lock(p) -#define __raw_read_lock_flags(p, f) arch_read_lock(p) -#define __raw_read_trylock(p) arch_read_trylock(p) -#define __raw_read_unlock(p) arch_read_unlock(p) -#define __raw_write_lock(p) arch_write_lock(p) -#define __raw_write_lock_flags(p, f) arch_write_lock(p) -#define __raw_write_unlock(p) arch_write_unlock(p) -#define __raw_write_trylock(p) arch_write_trylock(p) - -#define __raw_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) -#define __raw_write_can_lock(rw) (!(rw)->lock) +#define arch_read_lock(p) arch_read_lock(p) +#define arch_read_lock_flags(p, f) arch_read_lock(p) +#define arch_read_trylock(p) arch_read_trylock(p) +#define arch_read_unlock(p) arch_read_unlock(p) +#define arch_write_lock(p) arch_write_lock(p) +#define arch_write_lock_flags(p, f) arch_write_lock(p) +#define arch_write_unlock(p) arch_write_unlock(p) +#define arch_write_trylock(p) arch_write_trylock(p) + +#define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) +#define arch_write_can_lock(rw) (!(rw)->lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 99cb86e843a0..3089f70c0c52 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -232,7 +232,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) * read_can_lock - would read_trylock() succeed? * @lock: the rwlock in question. */ -static inline int __raw_read_can_lock(arch_rwlock_t *lock) +static inline int arch_read_can_lock(arch_rwlock_t *lock) { return (int)(lock)->lock > 0; } @@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(arch_rwlock_t *lock) * write_can_lock - would write_trylock() succeed? * @lock: the rwlock in question. */ -static inline int __raw_write_can_lock(arch_rwlock_t *lock) +static inline int arch_write_can_lock(arch_rwlock_t *lock) { return (lock)->lock == RW_LOCK_BIAS; } -static inline void __raw_read_lock(arch_rwlock_t *rw) +static inline void arch_read_lock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" "jns 1f\n" @@ -255,7 +255,7 @@ static inline void __raw_read_lock(arch_rwlock_t *rw) ::LOCK_PTR_REG (rw) : "memory"); } -static inline void __raw_write_lock(arch_rwlock_t *rw) +static inline void arch_write_lock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" "jz 1f\n" @@ -264,7 +264,7 @@ static inline void __raw_write_lock(arch_rwlock_t *rw) ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); } -static inline int __raw_read_trylock(arch_rwlock_t *lock) +static inline int arch_read_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; @@ -274,7 +274,7 @@ static inline int __raw_read_trylock(arch_rwlock_t *lock) return 0; } -static inline int __raw_write_trylock(arch_rwlock_t *lock) +static inline int arch_write_trylock(arch_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; @@ -284,19 +284,19 @@ static inline int __raw_write_trylock(arch_rwlock_t *lock) return 0; } -static inline void __raw_read_unlock(arch_rwlock_t *rw) +static inline void arch_read_unlock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); } -static inline void __raw_write_unlock(arch_rwlock_t *rw) +static inline void arch_write_unlock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX "addl %1, %0" : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); } -#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) -#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 73785b0bd6b9..5725b034defe 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -38,20 +38,20 @@ do { \ extern int _raw_write_trylock(rwlock_t *lock); extern void _raw_write_unlock(rwlock_t *lock); #else -# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) +# define _raw_read_lock(rwlock) arch_read_lock(&(rwlock)->raw_lock) # define _raw_read_lock_flags(lock, flags) \ - __raw_read_lock_flags(&(lock)->raw_lock, *(flags)) -# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) -# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) -# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) + arch_read_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) +# define _raw_read_unlock(rwlock) arch_read_unlock(&(rwlock)->raw_lock) +# define _raw_write_lock(rwlock) arch_write_lock(&(rwlock)->raw_lock) # define _raw_write_lock_flags(lock, flags) \ - __raw_write_lock_flags(&(lock)->raw_lock, *(flags)) -# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) -# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) + arch_write_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) +# define _raw_write_unlock(rwlock) arch_write_unlock(&(rwlock)->raw_lock) #endif -#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) -#define write_can_lock(rwlock) __raw_write_can_lock(&(rwlock)->raw_lock) +#define read_can_lock(rwlock) arch_read_can_lock(&(rwlock)->raw_lock) +#define write_can_lock(rwlock) arch_write_can_lock(&(rwlock)->raw_lock) /* * Define the various rw_lock methods. Note we define these diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 1d3bcc3cf7c6..b14f6a91e19f 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -49,12 +49,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) /* * Read-write spinlocks. No debug version. */ -#define __raw_read_lock(lock) do { (void)(lock); } while (0) -#define __raw_write_lock(lock) do { (void)(lock); } while (0) -#define __raw_read_trylock(lock) ({ (void)(lock); 1; }) -#define __raw_write_trylock(lock) ({ (void)(lock); 1; }) -#define __raw_read_unlock(lock) do { (void)(lock); } while (0) -#define __raw_write_unlock(lock) do { (void)(lock); } while (0) +#define arch_read_lock(lock) do { (void)(lock); } while (0) +#define arch_write_lock(lock) do { (void)(lock); } while (0) +#define arch_read_trylock(lock) ({ (void)(lock); 1; }) +#define arch_write_trylock(lock) ({ (void)(lock); 1; }) +#define arch_read_unlock(lock) do { (void)(lock); } while (0) +#define arch_write_unlock(lock) do { (void)(lock); } while (0) #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) @@ -67,8 +67,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_spin_is_contended(lock) (((void)(lock), 0)) -#define __raw_read_can_lock(lock) (((void)(lock), 1)) -#define __raw_write_can_lock(lock) (((void)(lock), 1)) +#define arch_read_can_lock(lock) (((void)(lock), 1)) +#define arch_write_can_lock(lock) (((void)(lock), 1)) #define arch_spin_unlock_wait(lock) \ do { cpu_relax(); } while (arch_spin_is_locked(lock)) diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 3f72f10d9cb0..0cea0bf6114e 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -176,7 +176,7 @@ static void __read_lock_debug(rwlock_t *lock) for (;;) { for (i = 0; i < loops; i++) { - if (__raw_read_trylock(&lock->raw_lock)) + if (arch_read_trylock(&lock->raw_lock)) return; __delay(1); } @@ -196,12 +196,12 @@ static void __read_lock_debug(rwlock_t *lock) void _raw_read_lock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - __raw_read_lock(&lock->raw_lock); + arch_read_lock(&lock->raw_lock); } int _raw_read_trylock(rwlock_t *lock) { - int ret = __raw_read_trylock(&lock->raw_lock); + int ret = arch_read_trylock(&lock->raw_lock); #ifndef CONFIG_SMP /* @@ -215,7 +215,7 @@ int _raw_read_trylock(rwlock_t *lock) void _raw_read_unlock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - __raw_read_unlock(&lock->raw_lock); + arch_read_unlock(&lock->raw_lock); } static inline void debug_write_lock_before(rwlock_t *lock) @@ -251,7 +251,7 @@ static void __write_lock_debug(rwlock_t *lock) for (;;) { for (i = 0; i < loops; i++) { - if (__raw_write_trylock(&lock->raw_lock)) + if (arch_write_trylock(&lock->raw_lock)) return; __delay(1); } @@ -271,13 +271,13 @@ static void __write_lock_debug(rwlock_t *lock) void _raw_write_lock(rwlock_t *lock) { debug_write_lock_before(lock); - __raw_write_lock(&lock->raw_lock); + arch_write_lock(&lock->raw_lock); debug_write_lock_after(lock); } int _raw_write_trylock(rwlock_t *lock) { - int ret = __raw_write_trylock(&lock->raw_lock); + int ret = arch_write_trylock(&lock->raw_lock); if (ret) debug_write_lock_after(lock); @@ -293,5 +293,5 @@ int _raw_write_trylock(rwlock_t *lock) void _raw_write_unlock(rwlock_t *lock) { debug_write_unlock(lock); - __raw_write_unlock(&lock->raw_lock); + arch_write_unlock(&lock->raw_lock); } -- cgit v1.2.3 From 239007b8440abff689632f50cdf0f2b9e895b534 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 17 Nov 2009 16:46:45 +0100 Subject: genirq: Convert irq_desc.lock to raw_spinlock Convert locks which cannot be sleeping locks in preempt-rt to raw_spinlocks. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: Ingo Molnar --- arch/alpha/kernel/irq.c | 4 +- arch/arm/include/asm/mach/irq.h | 4 +- arch/arm/kernel/irq.c | 12 ++--- arch/arm/mach-ns9xxx/irq.c | 8 +-- arch/avr32/kernel/irq.c | 4 +- arch/blackfin/kernel/irqchip.c | 6 +-- arch/blackfin/kernel/traps.c | 4 +- arch/cris/kernel/irq.c | 4 +- arch/frv/kernel/irq.c | 4 +- arch/h8300/kernel/irq.c | 4 +- arch/ia64/kernel/iosapic.c | 6 +-- arch/ia64/kernel/irq.c | 4 +- arch/ia64/kernel/irq_ia64.c | 4 +- arch/m32r/kernel/irq.c | 4 +- arch/microblaze/kernel/irq.c | 4 +- arch/mips/kernel/irq.c | 4 +- arch/mips/vr41xx/common/icu.c | 92 ++++++++++++++++----------------- arch/mn10300/kernel/irq.c | 4 +- arch/parisc/kernel/irq.c | 4 +- arch/powerpc/kernel/irq.c | 8 +-- arch/powerpc/platforms/52xx/media5200.c | 8 +-- arch/powerpc/platforms/cell/interrupt.c | 8 +-- arch/powerpc/platforms/iseries/irq.c | 4 +- arch/powerpc/platforms/pseries/xics.c | 4 +- arch/powerpc/sysdev/fsl_msi.c | 4 +- arch/powerpc/sysdev/uic.c | 8 +-- arch/sh/kernel/irq.c | 4 +- arch/sparc/kernel/irq_64.c | 8 +-- arch/um/kernel/irq.c | 4 +- arch/x86/kernel/apic/io_apic.c | 4 +- arch/x86/kernel/irq.c | 14 ++--- arch/xtensa/kernel/irq.c | 4 +- include/linux/irq.h | 2 +- kernel/irq/autoprobe.c | 20 +++---- kernel/irq/chip.c | 86 +++++++++++++++--------------- kernel/irq/handle.c | 22 ++++---- kernel/irq/internals.h | 2 +- kernel/irq/manage.c | 50 +++++++++--------- kernel/irq/migration.c | 2 +- kernel/irq/numa_migrate.c | 8 +-- kernel/irq/pm.c | 8 +-- kernel/irq/proc.c | 4 +- kernel/irq/spurious.c | 14 ++--- 43 files changed, 240 insertions(+), 240 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index c0de072b8305..5f2cf23c4648 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -81,7 +81,7 @@ show_interrupts(struct seq_file *p, void *v) #endif if (irq < ACTUAL_NR_IRQS) { - spin_lock_irqsave(&irq_desc[irq].lock, flags); + raw_spin_lock_irqsave(&irq_desc[irq].lock, flags); action = irq_desc[irq].action; if (!action) goto unlock; @@ -105,7 +105,7 @@ show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } else if (irq == ACTUAL_NR_IRQS) { #ifdef CONFIG_SMP seq_puts(p, "IPI: "); diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index acac5302e4ea..8920b2d6e3b8 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -26,9 +26,9 @@ extern int show_fiq_list(struct seq_file *, void *); */ #define do_bad_IRQ(irq,desc) \ do { \ - spin_lock(&desc->lock); \ + raw_spin_lock(&desc->lock); \ handle_bad_irq(irq, desc); \ - spin_unlock(&desc->lock); \ + raw_spin_unlock(&desc->lock); \ } while(0) #endif diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index c9a8619f3856..b7cb45bb91e8 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -84,7 +84,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { #ifdef CONFIG_FIQ show_fiq_list(p, v); @@ -139,7 +139,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) } desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; if (iflags & IRQF_VALID) desc->status &= ~IRQ_NOREQUEST; @@ -147,7 +147,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) desc->status &= ~IRQ_NOPROBE; if (!(iflags & IRQF_NOAUTOEN)) desc->status &= ~IRQ_NOAUTOEN; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } void __init init_IRQ(void) @@ -166,9 +166,9 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) { pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->node, cpu); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); desc->chip->set_affinity(irq, cpumask_of(cpu)); - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } /* diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c index feb0e54a91de..038f24d47023 100644 --- a/arch/arm/mach-ns9xxx/irq.c +++ b/arch/arm/mach-ns9xxx/irq.c @@ -66,7 +66,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); BUG_ON(desc->status & IRQ_INPROGRESS); @@ -78,7 +78,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) goto out_mask; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); @@ -87,7 +87,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) * Maybe this function should go to kernel/irq/chip.c? */ note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; if (desc->status & IRQ_DISABLED) @@ -97,7 +97,7 @@ out_mask: /* ack unconditionally to unmask lower prio irqs */ desc->chip->ack(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } #define handle_irq handle_prio_irq #endif diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c index 9f572229d318..c8ab63e3a675 100644 --- a/arch/avr32/kernel/irq.c +++ b/arch/avr32/kernel/irq.c @@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index db9f9c91f11f..64cff54a8a58 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -23,7 +23,7 @@ void ack_bad_irq(unsigned int irq) static struct irq_desc bad_irq_desc = { .handle_irq = handle_bad_irq, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(bad_irq_desc.lock), }; #ifdef CONFIG_CPUMASK_OFFSTACK @@ -39,7 +39,7 @@ int show_interrupts(struct seq_file *p, void *v) unsigned long flags; if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -53,7 +53,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 78cb3d38f899..9636bace00e8 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -1140,7 +1140,7 @@ void show_regs(struct pt_regs *fp) if (fp->ipend & ~0x3F) { for (i = 0; i < (NR_IRQS - 1); i++) { if (!in_atomic) - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) @@ -1155,7 +1155,7 @@ void show_regs(struct pt_regs *fp) verbose_printk("\n"); unlock: if (!in_atomic) - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } } diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 0ca7d9892cc6..b5ce0724a88f 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -52,7 +52,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -71,7 +71,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index af3e824b91b3..62d1aba615dc 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { seq_printf(p, "%3d: ", i); @@ -85,7 +85,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count)); } diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c index 5c913d472119..c25dc2c2b1da 100644 --- a/arch/h8300/kernel/irq.c +++ b/arch/h8300/kernel/irq.c @@ -186,7 +186,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_puts(p, " CPU0"); if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -200,7 +200,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index dab4d393908c..95ac77aeae9b 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -793,12 +793,12 @@ iosapic_register_intr (unsigned int gsi, goto unlock_iosapic_lock; } - spin_lock(&irq_desc[irq].lock); + raw_spin_lock(&irq_desc[irq].lock); dest = get_target_cpu(gsi, irq); dmode = choose_dmode(); err = register_intr(gsi, irq, dmode, polarity, trigger); if (err < 0) { - spin_unlock(&irq_desc[irq].lock); + raw_spin_unlock(&irq_desc[irq].lock); irq = err; goto unlock_iosapic_lock; } @@ -817,7 +817,7 @@ iosapic_register_intr (unsigned int gsi, (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, irq_to_vector(irq)); - spin_unlock(&irq_desc[irq].lock); + raw_spin_unlock(&irq_desc[irq].lock); unlock_iosapic_lock: spin_unlock_irqrestore(&iosapic_lock, flags); return irq; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 7d8951229e7c..94ee9d067cbd 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -71,7 +71,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -91,7 +91,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); return 0; diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index dd9d7b54f1a1..70e4bad23432 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -345,7 +345,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) desc = irq_desc + irq; cfg = irq_cfg + irq; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -358,7 +358,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&vector_lock, flags); cfg->move_cleanup_count--; unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } return IRQ_HANDLED; } diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index 8dfd31e87c4c..3c71f776872c 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c @@ -40,7 +40,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -59,7 +59,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c index 7d5ddd62d4d2..0f06034d1fe0 100644 --- a/arch/microblaze/kernel/irq.c +++ b/arch/microblaze/kernel/irq.c @@ -68,7 +68,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < nr_irq) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -89,7 +89,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 7b845ba9dff4..8b0b4181219f 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -118,7 +118,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_putc(p, '\n'); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 6d39e222b170..6153b6a05ccf 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -159,9 +159,9 @@ void vr41xx_enable_piuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_set(MPIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -174,9 +174,9 @@ void vr41xx_disable_piuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MPIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -189,9 +189,9 @@ void vr41xx_enable_aiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_set(MAIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -204,9 +204,9 @@ void vr41xx_disable_aiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MAIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -219,9 +219,9 @@ void vr41xx_enable_kiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_set(MKIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -234,9 +234,9 @@ void vr41xx_disable_kiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MKIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -247,9 +247,9 @@ void vr41xx_enable_macint(uint16_t mask) struct irq_desc *desc = irq_desc + ETHERNET_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_set(MMACINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_macint); @@ -259,9 +259,9 @@ void vr41xx_disable_macint(uint16_t mask) struct irq_desc *desc = irq_desc + ETHERNET_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MMACINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_macint); @@ -271,9 +271,9 @@ void vr41xx_enable_dsiuint(uint16_t mask) struct irq_desc *desc = irq_desc + DSIU_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_set(MDSIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_dsiuint); @@ -283,9 +283,9 @@ void vr41xx_disable_dsiuint(uint16_t mask) struct irq_desc *desc = irq_desc + DSIU_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MDSIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_dsiuint); @@ -295,9 +295,9 @@ void vr41xx_enable_firint(uint16_t mask) struct irq_desc *desc = irq_desc + FIR_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_set(MFIRINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_firint); @@ -307,9 +307,9 @@ void vr41xx_disable_firint(uint16_t mask) struct irq_desc *desc = irq_desc + FIR_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_clear(MFIRINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_firint); @@ -322,9 +322,9 @@ void vr41xx_enable_pciint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_write(MPCIINTREG, PCIINT0); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -338,9 +338,9 @@ void vr41xx_disable_pciint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_write(MPCIINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -354,9 +354,9 @@ void vr41xx_enable_scuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_write(MSCUINTREG, SCUINT0); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -370,9 +370,9 @@ void vr41xx_disable_scuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_write(MSCUINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -386,9 +386,9 @@ void vr41xx_enable_csiint(uint16_t mask) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_set(MCSIINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -402,9 +402,9 @@ void vr41xx_disable_csiint(uint16_t mask) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_clear(MCSIINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -418,9 +418,9 @@ void vr41xx_enable_bcuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_write(MBCUINTREG, BCUINTR); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -434,9 +434,9 @@ void vr41xx_disable_bcuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); icu2_write(MBCUINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -486,7 +486,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) pin = SYSINT1_IRQ_TO_PIN(irq); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); intassign0 = icu1_read(INTASSIGN0); intassign1 = icu1_read(INTASSIGN1); @@ -525,7 +525,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) intassign1 |= (uint16_t)assign << 9; break; default: - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -533,7 +533,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) icu1_write(INTASSIGN0, intassign0); icu1_write(INTASSIGN1, intassign1); - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); return 0; } @@ -546,7 +546,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) pin = SYSINT2_IRQ_TO_PIN(irq); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); intassign2 = icu1_read(INTASSIGN2); intassign3 = icu1_read(INTASSIGN3); @@ -593,7 +593,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) intassign3 |= (uint16_t)assign << 12; break; default: - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -601,7 +601,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) icu1_write(INTASSIGN2, intassign2); icu1_write(INTASSIGN3, intassign3); - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); return 0; } diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c index 4c3c58ef5cda..e2d5ed891f37 100644 --- a/arch/mn10300/kernel/irq.c +++ b/arch/mn10300/kernel/irq.c @@ -215,7 +215,7 @@ int show_interrupts(struct seq_file *p, void *v) /* display information rows, one per active CPU */ case 1 ... NR_IRQS - 1: - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { @@ -235,7 +235,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); break; /* polish off with NMI and error counters */ diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 2e7610cb33d5..f47465e8d040 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -180,7 +180,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < NR_IRQS) { struct irqaction *action; - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -224,7 +224,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f6dca4f4b295..9040330b0530 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -210,7 +210,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); action = desc->action; if (!action || !action->handler) @@ -237,7 +237,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -1112,7 +1112,7 @@ static int virq_debug_show(struct seq_file *m, void *private) if (!desc) continue; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { seq_printf(m, "%5d ", i); @@ -1131,7 +1131,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%s\n", p); } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } return 0; diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index cc0c854291d7..0bac3a3dbecf 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -86,9 +86,9 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) u32 status, enable; /* Mask off the cascaded IRQ */ - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->chip->mask(virq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); /* Ask the FPGA for IRQ status. If 'val' is 0, then no irqs * are pending. 'ffs()' is 1 based */ @@ -104,11 +104,11 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) } /* Processing done; can reenable the cascade now */ - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->chip->ack(virq); if (!(desc->status & IRQ_DISABLED)) desc->chip->unmask(virq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } static int media5200_irq_map(struct irq_host *h, unsigned int virq, diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 7267effc8078..6829cf7e2bda 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -237,7 +237,7 @@ extern int noirqdebug; static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) { - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); @@ -265,18 +265,18 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) goto out_eoi; desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); desc->status &= ~IRQ_INPROGRESS; out_eoi: desc->chip->eoi(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } static int iic_host_map(struct irq_host *h, unsigned int virq, diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index 07762259c60a..86c4b29eea89 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -217,9 +217,9 @@ void __init iSeries_activate_IRQs() struct irq_desc *desc = irq_to_desc(irq); if (desc && desc->chip && desc->chip->startup) { - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->chip->startup(irq); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } } diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 7d01b58f3989..b9b9e11609ec 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -906,7 +906,7 @@ void xics_migrate_irqs_away(void) || desc->chip->set_affinity == NULL) continue; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { @@ -930,7 +930,7 @@ void xics_migrate_irqs_away(void) cpumask_setall(irq_to_desc(virq)->affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } #endif diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 62e50258cdef..c6e11b077108 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -173,7 +173,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if ((msi_data->feature & FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) { if (desc->chip->mask_ack) desc->chip->mask_ack(irq); @@ -225,7 +225,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) break; } unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } static int __devinit fsl_of_msi_probe(struct of_device *dev, diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 7d10074b3304..6f220a913e42 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -225,12 +225,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) int src; int subvirq; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (desc->status & IRQ_LEVEL) desc->chip->mask(virq); else desc->chip->mask_ack(virq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); msr = mfdcr(uic->dcrbase + UIC_MSR); if (!msr) /* spurious interrupt */ @@ -242,12 +242,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) generic_handle_irq(subvirq); uic_irq_ret: - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (desc->status & IRQ_LEVEL) desc->chip->ack(virq); if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(virq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } static struct uic * __init uic_init_one(struct device_node *node) diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index e1913f28f418..d2d41d046657 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; @@ -97,7 +97,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } #endif diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index ce996f97855f..8d6882bb480a 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -176,7 +176,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -195,7 +195,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -785,14 +785,14 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { unsigned long flags; - spin_lock_irqsave(&irq_desc[irq].lock, flags); + raw_spin_lock_irqsave(&irq_desc[irq].lock, flags); if (irq_desc[irq].action && !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, irq_desc[irq].affinity); } - spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } tick_ops->disable_irq(); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 039270b9b73b..89474ba0741e 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -34,7 +34,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -53,7 +53,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) seq_putc(p, '\n'); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d5d498fbee4b..11a5851f1f50 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2431,7 +2431,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2450,7 +2450,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) } __get_cpu_var(vector_irq)[vector] = -1; unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } irq_exit(); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 664bcb7384ac..91fd0c70a18a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; @@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -294,12 +294,12 @@ void fixup_irqs(void) continue; /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); continue; } @@ -326,7 +326,7 @@ void fixup_irqs(void) if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) desc->chip->unmask(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); if (break_affinity && set_affinity) printk("Broke affinity for irq %i\n", irq); @@ -356,10 +356,10 @@ void fixup_irqs(void) irq = __get_cpu_var(vector_irq)[vector]; desc = irq_to_desc(irq); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (desc->chip->retrigger) desc->chip->retrigger(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } } } diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a1badb32fcda..8cd38484e130 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + raw_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -109,7 +109,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) diff --git a/include/linux/irq.h b/include/linux/irq.h index a287cfc0b1a6..451481c082b5 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -192,7 +192,7 @@ struct irq_desc { unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; unsigned int node; diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 1de9700f416e..2295a31ef110 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -45,7 +45,7 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { /* * An old-style architecture might still have @@ -61,7 +61,7 @@ unsigned long probe_irq_on(void) desc->chip->set_type(i, IRQ_TYPE_PROBE); desc->chip->startup(i); } - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } /* Wait for longstanding interrupts to trigger. */ @@ -73,13 +73,13 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { desc->status |= IRQ_AUTODETECT | IRQ_WAITING; if (desc->chip->startup(i)) desc->status |= IRQ_PENDING; } - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } /* @@ -91,7 +91,7 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); status = desc->status; if (status & IRQ_AUTODETECT) { @@ -103,7 +103,7 @@ unsigned long probe_irq_on(void) if (i < 32) mask |= 1 << i; } - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } return mask; @@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); status = desc->status; if (status & IRQ_AUTODETECT) { @@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val) desc->status = status & ~IRQ_AUTODETECT; desc->chip->shutdown(i); } - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); @@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val) unsigned int status; for_each_irq_desc(i, desc) { - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); status = desc->status; if (status & IRQ_AUTODETECT) { @@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val) desc->status = status & ~IRQ_AUTODETECT; desc->chip->shutdown(i); } - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ba566c261adc..ecc3fa28f666 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq) } /* Ensure we don't have left over values from a previous use of this irq */ - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->status = IRQ_DISABLED; desc->chip = &no_irq_chip; desc->handle_irq = handle_bad_irq; @@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq) cpumask_clear(desc->pending_mask); #endif #endif - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } /** @@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq) return; } - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action) { - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", irq); return; @@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq) desc->chip = &no_irq_chip; desc->name = NULL; clear_kstat_irqs(desc); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } @@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) if (!chip) chip = &no_irq_chip; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); irq_chip_set_defaults(chip); desc->chip = chip; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type) if (type == IRQ_TYPE_NONE) return 0; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); ret = __irq_set_trigger(desc, irq, type); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } EXPORT_SYMBOL(set_irq_type); @@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data) return -EINVAL; } - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->handler_data = data; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } EXPORT_SYMBOL(set_irq_data); @@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) return -EINVAL; } - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->msi_desc = entry; if (entry) entry->irq = irq; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data) return -EINVAL; } - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->chip_data = data; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -241,12 +241,12 @@ void set_irq_nested_thread(unsigned int irq, int nest) if (!desc) return; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); if (nest) desc->status |= IRQ_NESTED_THREAD; else desc->status &= ~IRQ_NESTED_THREAD; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL_GPL(set_irq_nested_thread); @@ -343,7 +343,7 @@ void handle_nested_irq(unsigned int irq) might_sleep(); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); kstat_incr_irqs_this_cpu(irq, desc); @@ -352,17 +352,17 @@ void handle_nested_irq(unsigned int irq) goto out_unlock; desc->status |= IRQ_INPROGRESS; - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); action_ret = action->thread_fn(action->irq, action->dev_id); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); desc->status &= ~IRQ_INPROGRESS; out_unlock: - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } EXPORT_SYMBOL_GPL(handle_nested_irq); @@ -384,7 +384,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -396,16 +396,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; out_unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } /** @@ -424,7 +424,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); mask_ack_irq(desc, irq); if (unlikely(desc->status & IRQ_INPROGRESS)) @@ -441,13 +441,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; if (unlikely(desc->status & IRQ_ONESHOT)) @@ -455,7 +455,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(irq); out_unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_level_irq); @@ -475,7 +475,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out; @@ -497,18 +497,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status |= IRQ_INPROGRESS; desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } /** @@ -530,7 +530,7 @@ out: void handle_edge_irq(unsigned int irq, struct irq_desc *desc) { - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); @@ -576,17 +576,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) } desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); desc->status &= ~IRQ_INPROGRESS; out_unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } /** @@ -643,7 +643,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, } chip_bus_lock(irq, desc); - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); /* Uninstall? */ if (handle == handle_bad_irq) { @@ -661,7 +661,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, desc->depth = 0; desc->chip->startup(irq); } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL_GPL(__set_irq_handler); @@ -692,9 +692,9 @@ void __init set_irq_noprobe(unsigned int irq) return; } - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->status |= IRQ_NOPROBE; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } void __init set_irq_probe(unsigned int irq) @@ -707,7 +707,7 @@ void __init set_irq_probe(unsigned int irq) return; } - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); desc->status &= ~IRQ_NOPROBE; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 17c71bb565c6..814940e7f485 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -80,7 +80,7 @@ static struct irq_desc irq_desc_init = { .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), }; void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) @@ -108,7 +108,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) { memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); - spin_lock_init(&desc->lock); + raw_spin_lock_init(&desc->lock); desc->irq = irq; #ifdef CONFIG_SMP desc->node = node; @@ -130,7 +130,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) /* * Protect the sparse_irqs: */ -DEFINE_SPINLOCK(sparse_irq_lock); +DEFINE_RAW_SPINLOCK(sparse_irq_lock); struct irq_desc **irq_desc_ptrs __read_mostly; @@ -141,7 +141,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), } }; @@ -212,7 +212,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) if (desc) return desc; - spin_lock_irqsave(&sparse_irq_lock, flags); + raw_spin_lock_irqsave(&sparse_irq_lock, flags); /* We have to check it to avoid races with another CPU */ desc = irq_desc_ptrs[irq]; @@ -234,7 +234,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) irq_desc_ptrs[irq] = desc; out_unlock: - spin_unlock_irqrestore(&sparse_irq_lock, flags); + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); return desc; } @@ -247,7 +247,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), } }; @@ -473,7 +473,7 @@ unsigned int __do_IRQ(unsigned int irq) return 1; } - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (desc->chip->ack) desc->chip->ack(irq); /* @@ -517,13 +517,13 @@ unsigned int __do_IRQ(unsigned int irq) for (;;) { irqreturn_t action_ret; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; @@ -536,7 +536,7 @@ out: * disabled while the handler was running. */ desc->chip->end(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); return 1; } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 1b5d742c6a77..b2821f070a3d 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); extern void clear_kstat_irqs(struct irq_desc *desc); -extern spinlock_t sparse_irq_lock; +extern raw_spinlock_t sparse_irq_lock; #ifdef CONFIG_SPARSE_IRQ /* irq_desc_ptrs allocated at boot time */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7305b297d1eb..eb6078ca60c7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq) cpu_relax(); /* Ok, that indicated we're done: double-check carefully. */ - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); status = desc->status; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); /* Oops, that failed? */ } while (status & IRQ_INPROGRESS); @@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) if (!desc->chip->set_affinity) return -EINVAL; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT) { @@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) } #endif desc->status |= IRQ_AFFINITY_SET; - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); ret = setup_affinity(irq, desc); if (!ret) irq_set_thread_affinity(desc); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } @@ -231,9 +231,9 @@ void disable_irq_nosync(unsigned int irq) return; chip_bus_lock(irq, desc); - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); __disable_irq(desc, irq, false); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL(disable_irq_nosync); @@ -308,9 +308,9 @@ void enable_irq(unsigned int irq) return; chip_bus_lock(irq, desc); - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, false); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL(enable_irq); @@ -347,7 +347,7 @@ int set_irq_wake(unsigned int irq, unsigned int on) /* wakeup-capable irqs can be shared between drivers that * don't need to have the same sleep mode behaviors. */ - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); if (on) { if (desc->wake_depth++ == 0) { ret = set_irq_wake_real(irq, on); @@ -368,7 +368,7 @@ int set_irq_wake(unsigned int irq, unsigned int on) } } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } EXPORT_SYMBOL(set_irq_wake); @@ -484,12 +484,12 @@ static int irq_wait_for_interrupt(struct irqaction *action) static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) { chip_bus_lock(irq, desc); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { desc->status &= ~IRQ_MASKED; desc->chip->unmask(irq); } - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); chip_bus_sync_unlock(irq, desc); } @@ -514,9 +514,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) return; } - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); cpumask_copy(mask, desc->affinity); - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); set_cpus_allowed_ptr(current, mask); free_cpumask_var(mask); @@ -545,7 +545,7 @@ static int irq_thread(void *data) atomic_inc(&desc->threads_active); - spin_lock_irq(&desc->lock); + raw_spin_lock_irq(&desc->lock); if (unlikely(desc->status & IRQ_DISABLED)) { /* * CHECKME: We might need a dedicated @@ -555,9 +555,9 @@ static int irq_thread(void *data) * retriggers the interrupt itself --- tglx */ desc->status |= IRQ_PENDING; - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); } else { - spin_unlock_irq(&desc->lock); + raw_spin_unlock_irq(&desc->lock); action->thread_fn(action->irq, action->dev_id); @@ -679,7 +679,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) /* * The following block of code has to be executed atomically */ - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); old_ptr = &desc->action; old = *old_ptr; if (old) { @@ -775,7 +775,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) __enable_irq(desc, irq, false); } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); /* * Strictly no need to wake it up, but hung_task complains @@ -802,7 +802,7 @@ mismatch: ret = -EBUSY; out_thread: - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); if (new->thread) { struct task_struct *t = new->thread; @@ -844,7 +844,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!desc) return NULL; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); /* * There can be multiple actions per IRQ descriptor, find the right @@ -856,7 +856,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!action) { WARN(1, "Trying to free already-free IRQ %d\n", irq); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return NULL; } @@ -884,7 +884,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) desc->chip->disable(irq); } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index fcb6c96f2627..241962280836 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -27,7 +27,7 @@ void move_masked_irq(int irq) if (!desc->chip->set_affinity) return; - assert_spin_locked(&desc->lock); + assert_raw_spin_locked(&desc->lock); /* * If there was a valid mask to work with, please diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 3fd30197da2e..26bac9d8f860 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, "for migration.\n", irq); return false; } - spin_lock_init(&desc->lock); + raw_spin_lock_init(&desc->lock); desc->node = node; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); @@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, irq = old_desc->irq; - spin_lock_irqsave(&sparse_irq_lock, flags); + raw_spin_lock_irqsave(&sparse_irq_lock, flags); /* We have to check it to avoid races with another CPU */ desc = irq_desc_ptrs[irq]; @@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, } irq_desc_ptrs[irq] = desc; - spin_unlock_irqrestore(&sparse_irq_lock, flags); + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); /* free the old one */ free_one_irq_desc(old_desc, desc); @@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, return desc; out_unlock: - spin_unlock_irqrestore(&sparse_irq_lock, flags); + raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); return desc; } diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index a0bb09e79867..0d4005d85b03 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -28,9 +28,9 @@ void suspend_device_irqs(void) for_each_irq_desc(irq, desc) { unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); __disable_irq(desc, irq, true); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } for_each_irq_desc(irq, desc) @@ -56,9 +56,9 @@ void resume_device_irqs(void) if (!(desc->status & IRQ_SUSPENDED)) continue; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, true); - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } } EXPORT_SYMBOL_GPL(resume_device_irqs); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 0832145fea97..6f50eccc79c0 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -179,7 +179,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action) unsigned long flags; int ret = 1; - spin_lock_irqsave(&desc->lock, flags); + raw_spin_lock_irqsave(&desc->lock, flags); for (action = desc->action ; action; action = action->next) { if ((action != new_action) && action->name && !strcmp(new_action->name, action->name)) { @@ -187,7 +187,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action) break; } } - spin_unlock_irqrestore(&desc->lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index e49ea1c5232d..89fb90ae534f 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc) struct irqaction *action; int ok = 0, work = 0; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); /* Already running on another processor */ if (desc->status & IRQ_INPROGRESS) { /* @@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc) */ if (desc->action && (desc->action->flags & IRQF_SHARED)) desc->status |= IRQ_PENDING; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); return ok; } /* Honour the normal IRQ locking */ desc->status |= IRQ_INPROGRESS; action = desc->action; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); while (action) { /* Only shared IRQ handlers are safe to call */ @@ -56,7 +56,7 @@ static int try_one_irq(int irq, struct irq_desc *desc) } local_irq_disable(); /* Now clean up the flags */ - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); action = desc->action; /* @@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc) * Perform real IRQ processing for the IRQ we deferred */ work = 1; - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); handle_IRQ_event(irq, action); - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); desc->status &= ~IRQ_PENDING; } desc->status &= ~IRQ_INPROGRESS; @@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc) */ if (work && desc->chip && desc->chip->end) desc->chip->end(irq); - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); return ok; } -- cgit v1.2.3 From 03a05ed1152944000151d57b71000de287a1eb02 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 11 Dec 2009 15:17:20 +0800 Subject: ACPI: Use the ARB_DISABLE for the CPU which model id is less than 0x0f. Currently, ARB_DISABLE is a NOP on all of the recent Intel platforms. For such platforms, reduce contention on c3_lock by skipping the fake ARB_DISABLE. The cpu model id on one laptop is 14. If we disable ARB_DISABLE on this box, the box can't be booted correctly. But if we still enable ARB_DISABLE on this box, the box can be booted correctly. So we still use the ARB_DISABLE for the cpu which mode id is less than 0x0f. http://bugzilla.kernel.org/show_bug.cgi?id=14700 Signed-off-by: Zhao Yakui Acked-by: Pallipadi, Venkatesh cc: stable@kernel.org Signed-off-by: Len Brown --- arch/x86/kernel/acpi/cstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 59cdfa4686b2..2e837f5080fe 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, * P4, Core and beyond CPUs */ if (c->x86_vendor == X86_VENDOR_INTEL && - (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) + (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f))) flags->bm_control = 0; } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); -- cgit v1.2.3 From e6428047725d72d63c1d9c4ba852e635e3ffe52a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 15 Dec 2009 16:28:13 -0600 Subject: x86: don't export inline function For CONFIG_PARAVIRT, load_gs_index is an inline function (it's #defined to native_load_gs_index otherwise). Exporting an inline function breaks the new assembler-based alphabetical sorted symbol list: Today's linux-next build (x86_64 allmodconfig) failed like this: .tmp_exports-asm.o: In function `__ksymtab_load_gs_index': (__ksymtab_sorted+0x5b40): undefined reference to `load_gs_index' Signed-off-by: Rusty Russell To: x86@kernel.org Cc: alan-jenkins@tuffmail.co.uk --- arch/x86/kernel/x8664_ksyms_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a1029769b6f2..084c1adc45f5 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -56,4 +56,6 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); -EXPORT_SYMBOL(load_gs_index); +#ifndef CONFIG_PARAVIRT +EXPORT_SYMBOL(native_load_gs_index); +#endif -- cgit v1.2.3 From 186a25026c44d1bfa97671110ff14dcd0c99678e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Dec 2009 20:47:56 +0900 Subject: x86: Split swiotlb initialization into two stages The commit f4780ca005404166cc40af77ef0e86132ab98a81 moves swiotlb initialization before dma32_free_bootmem(). It's supposed to fix a bug that the commit 75f1cdf1dda92cae037ec848ae63690d91913eac introduced, we initialize SWIOTLB right after dma32_free_bootmem so we wrongly steal memory area allocated for GART with broken BIOS earlier. However, the above commit introduced another problem, which likely breaks machines with huge amount of memory. Such a box use the majority of DMA32_ZONE so there is no memory for swiotlb. With this patch, the x86 IOMMU initialization sequence are: 1. We set swiotlb to 1 in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). If swiotlb usage is forced by the boot option, we go to the step 3 and finish (we don't try to detect IOMMUs). 2. We call the detection functions of all the IOMMUs. The detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 3. We initialize swiotlb (and set dma_ops to swiotlb_dma_ops) if swiotlb is set to 1. 4. If the IOMMU initialization function doesn't need swiotlb (e.g. the initialization is sucessful) then sets swiotlb to zero. 5. If we find that swiotlb is set to zero, we free swiotlb resource. Reported-by: Yinghai Lu Reported-by: Roland Dreier Signed-off-by: FUJITA Tomonori LKML-Reference: <20091215204729A.fujita.tomonori@lab.ntt.co.jp> Tested-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/swiotlb.h | 8 ++++++-- arch/x86/kernel/pci-dma.c | 9 ++++----- arch/x86/kernel/pci-swiotlb.c | 11 +++++++---- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 87ffcb12a1b8..8085277e1b8b 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -5,13 +5,17 @@ #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern int pci_swiotlb_init(void); +extern int __init pci_swiotlb_detect(void); +extern void __init pci_swiotlb_init(void); #else #define swiotlb 0 -static inline int pci_swiotlb_init(void) +static inline int pci_swiotlb_detect(void) { return 0; } +static inline void pci_swiotlb_init(void) +{ +} #endif static inline void dma_mark_clean(void *addr, size_t size) {} diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index fcc2f2bfa39c..75e14e21f61a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -120,15 +120,12 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { - int use_swiotlb; - - use_swiotlb = pci_swiotlb_init(); #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif - if (use_swiotlb) - return; + if (pci_swiotlb_detect()) + goto out; gart_iommu_hole_init(); @@ -138,6 +135,8 @@ void __init pci_iommu_alloc(void) /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); +out: + pci_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index e3c0a66b9e77..7d2829dde20e 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -43,12 +43,12 @@ static struct dma_map_ops swiotlb_dma_ops = { }; /* - * pci_swiotlb_init - initialize swiotlb if necessary + * pci_swiotlb_detect - set swiotlb to 1 if necessary * * This returns non-zero if we are forced to use swiotlb (by the boot * option). */ -int __init pci_swiotlb_init(void) +int __init pci_swiotlb_detect(void) { int use_swiotlb = swiotlb | swiotlb_force; @@ -60,10 +60,13 @@ int __init pci_swiotlb_init(void) if (swiotlb_force) swiotlb = 1; + return use_swiotlb; +} + +void __init pci_swiotlb_init(void) +{ if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } - - return use_swiotlb; } -- cgit v1.2.3 From 4e25b2576efda24c02e2d6b9bcb5965a3f865f33 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:23 -0800 Subject: hugetlb: add generic definition of NUMA_NO_NODE Move definition of NUMA_NO_NODE from ia64 and x86_64 arch specific headers to generic header 'linux/numa.h' for use in generic code. NUMA_NO_NODE replaces bare '-1' where it's used in this series to indicate "no node id specified". Ultimately, it can be used to replace the -1 elsewhere where it is used similarly. Signed-off-by: Lee Schermerhorn Acked-by: David Rientjes Acked-by: Mel Gorman Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/numa.h | 2 -- arch/x86/include/asm/topology.h | 9 +++++++-- include/linux/numa.h | 2 ++ 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h index 3499ff57bf42..6a8a27cfae3e 100644 --- a/arch/ia64/include/asm/numa.h +++ b/arch/ia64/include/asm/numa.h @@ -22,8 +22,6 @@ #include -#define NUMA_NO_NODE -1 - extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; extern pg_data_t *pgdat_list[MAX_NUMNODES]; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 40e37b10c6c0..c5087d796587 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -35,11 +35,16 @@ # endif #endif -/* Node not present */ -#define NUMA_NO_NODE (-1) +/* + * to preserve the visibility of NUMA_NO_NODE definition, + * moved to there from here. May be used independent of + * CONFIG_NUMA. + */ +#include #ifdef CONFIG_NUMA #include + #include #ifdef CONFIG_X86_32 diff --git a/include/linux/numa.h b/include/linux/numa.h index a31a7301b159..3aaa31603a86 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -10,4 +10,6 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) +#define NUMA_NO_NODE (-1) + #endif /* _LINUX_NUMA_H */ -- cgit v1.2.3 From 5f0a96b044d8edaee20f4a32ef6c393599ca55f8 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:32 -0800 Subject: cs5535-gpio: add AMD CS5535/CS5536 GPIO driver support This creates a CS5535/CS5536 GPIO driver which uses a gpio_chip backend (allowing GPIO users to use the generic GPIO API if desired) while also allowing architecture-specific users directly (via the cs5535_gpio_* functions). Tested on an OLPC machine. Some Leemotes also use CS5536 (with a mips cpu), which is why this is in drivers/gpio rather than arch/x86. Currently, it conflicts with older geode GPIO support; once MFGPT support is reworked to also be more generic, the older geode code will be removed. Signed-off-by: Andres Salomon Cc: Takashi Iwai Cc: Jordan Crouse Cc: David Brownell Reviewed-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/geode.h | 28 +---- drivers/gpio/Kconfig | 10 ++ drivers/gpio/Makefile | 1 + drivers/gpio/cs5535-gpio.c | 282 +++++++++++++++++++++++++++++++++++++++++++ include/linux/cs5535.h | 58 +++++++++ 5 files changed, 352 insertions(+), 27 deletions(-) create mode 100644 drivers/gpio/cs5535-gpio.c create mode 100644 include/linux/cs5535.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index ad3c2ed75481..5716214d37d9 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -12,6 +12,7 @@ #include #include +#include /* Generic southbridge functions */ @@ -115,33 +116,6 @@ extern int geode_get_dev_base(unsigned int dev); #define VSA_VR_MEM_SIZE 0x0200 #define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */ #define GSW_VSA_SIG 0x534d /* General Software signature */ -/* GPIO */ - -#define GPIO_OUTPUT_VAL 0x00 -#define GPIO_OUTPUT_ENABLE 0x04 -#define GPIO_OUTPUT_OPEN_DRAIN 0x08 -#define GPIO_OUTPUT_INVERT 0x0C -#define GPIO_OUTPUT_AUX1 0x10 -#define GPIO_OUTPUT_AUX2 0x14 -#define GPIO_PULL_UP 0x18 -#define GPIO_PULL_DOWN 0x1C -#define GPIO_INPUT_ENABLE 0x20 -#define GPIO_INPUT_INVERT 0x24 -#define GPIO_INPUT_FILTER 0x28 -#define GPIO_INPUT_EVENT_COUNT 0x2C -#define GPIO_READ_BACK 0x30 -#define GPIO_INPUT_AUX1 0x34 -#define GPIO_EVENTS_ENABLE 0x38 -#define GPIO_LOCK_ENABLE 0x3C -#define GPIO_POSITIVE_EDGE_EN 0x40 -#define GPIO_NEGATIVE_EDGE_EN 0x44 -#define GPIO_POSITIVE_EDGE_STS 0x48 -#define GPIO_NEGATIVE_EDGE_STS 0x4C - -#define GPIO_MAP_X 0xE0 -#define GPIO_MAP_Y 0xE4 -#define GPIO_MAP_Z 0xE8 -#define GPIO_MAP_W 0xEC static inline u32 geode_gpio(unsigned int nr) { diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 2ad0128c63c6..df4b82d1348e 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -174,6 +174,16 @@ config GPIO_ADP5520 comment "PCI GPIO expanders:" +config GPIO_CS5535 + tristate "AMD CS5535/CS5536 GPIO support" + depends on PCI && !CS5535_GPIO && !MGEODE_LX + help + The AMD CS5535 and CS5536 southbridges support 28 GPIO pins that + can be used for quite a number of things. The CS5535/6 is found on + AMD Geode and Lemote Yeeloong devices. + + If unsure, say N. + config GPIO_BT8XX tristate "BT8XX GPIO abuser" depends on PCI && VIDEO_BT848=n diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 00a532c9a1e2..270b6d7839f5 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_GPIO_PL061) += pl061.o obj-$(CONFIG_GPIO_TWL4030) += twl4030-gpio.o obj-$(CONFIG_GPIO_UCB1400) += ucb1400_gpio.o obj-$(CONFIG_GPIO_XILINX) += xilinx_gpio.o +obj-$(CONFIG_GPIO_CS5535) += cs5535-gpio.o obj-$(CONFIG_GPIO_BT8XX) += bt8xxgpio.o obj-$(CONFIG_GPIO_VR41XX) += vr41xx_giu.o obj-$(CONFIG_GPIO_WM831X) += wm831x-gpio.o diff --git a/drivers/gpio/cs5535-gpio.c b/drivers/gpio/cs5535-gpio.c new file mode 100644 index 000000000000..56138893819a --- /dev/null +++ b/drivers/gpio/cs5535-gpio.c @@ -0,0 +1,282 @@ +/* + * AMD CS5535/CS5536 GPIO driver + * Copyright (C) 2006 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Andres Salomon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "cs5535-gpio" +#define GPIO_BAR 1 + +static struct cs5535_gpio_chip { + struct gpio_chip chip; + resource_size_t base; + + struct pci_dev *pdev; + spinlock_t lock; +} cs5535_gpio_chip; + +/* + * The CS5535/CS5536 GPIOs support a number of extra features not defined + * by the gpio_chip API, so these are exported. For a full list of the + * registers, see include/linux/cs5535.h. + */ + +static void __cs5535_gpio_set(struct cs5535_gpio_chip *chip, unsigned offset, + unsigned int reg) +{ + if (offset < 16) + /* low bank register */ + outl(1 << offset, chip->base + reg); + else + /* high bank register */ + outl(1 << (offset - 16), chip->base + 0x80 + reg); +} + +void cs5535_gpio_set(unsigned offset, unsigned int reg) +{ + struct cs5535_gpio_chip *chip = &cs5535_gpio_chip; + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + __cs5535_gpio_set(chip, offset, reg); + spin_unlock_irqrestore(&chip->lock, flags); +} +EXPORT_SYMBOL_GPL(cs5535_gpio_set); + +static void __cs5535_gpio_clear(struct cs5535_gpio_chip *chip, unsigned offset, + unsigned int reg) +{ + if (offset < 16) + /* low bank register */ + outl(1 << (offset + 16), chip->base + reg); + else + /* high bank register */ + outl(1 << offset, chip->base + 0x80 + reg); +} + +void cs5535_gpio_clear(unsigned offset, unsigned int reg) +{ + struct cs5535_gpio_chip *chip = &cs5535_gpio_chip; + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + __cs5535_gpio_clear(chip, offset, reg); + spin_unlock_irqrestore(&chip->lock, flags); +} +EXPORT_SYMBOL_GPL(cs5535_gpio_clear); + +int cs5535_gpio_isset(unsigned offset, unsigned int reg) +{ + struct cs5535_gpio_chip *chip = &cs5535_gpio_chip; + unsigned long flags; + long val; + + spin_lock_irqsave(&chip->lock, flags); + if (offset < 16) + /* low bank register */ + val = inl(chip->base + reg); + else { + /* high bank register */ + val = inl(chip->base + 0x80 + reg); + offset -= 16; + } + spin_unlock_irqrestore(&chip->lock, flags); + + return (val & (1 << offset)) ? 1 : 0; +} +EXPORT_SYMBOL_GPL(cs5535_gpio_isset); + +/* + * Generic gpio_chip API support. + */ + +static int chip_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + return cs5535_gpio_isset(offset, GPIO_OUTPUT_VAL); +} + +static void chip_gpio_set(struct gpio_chip *chip, unsigned offset, int val) +{ + if (val) + cs5535_gpio_set(offset, GPIO_OUTPUT_VAL); + else + cs5535_gpio_clear(offset, GPIO_OUTPUT_VAL); +} + +static int chip_direction_input(struct gpio_chip *c, unsigned offset) +{ + struct cs5535_gpio_chip *chip = (struct cs5535_gpio_chip *) c; + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + __cs5535_gpio_set(chip, offset, GPIO_INPUT_ENABLE); + spin_unlock_irqrestore(&chip->lock, flags); + + return 0; +} + +static int chip_direction_output(struct gpio_chip *c, unsigned offset, int val) +{ + struct cs5535_gpio_chip *chip = (struct cs5535_gpio_chip *) c; + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + + __cs5535_gpio_set(chip, offset, GPIO_OUTPUT_ENABLE); + if (val) + __cs5535_gpio_set(chip, offset, GPIO_OUTPUT_VAL); + else + __cs5535_gpio_clear(chip, offset, GPIO_OUTPUT_VAL); + + spin_unlock_irqrestore(&chip->lock, flags); + + return 0; +} + +static struct cs5535_gpio_chip cs5535_gpio_chip = { + .chip = { + .owner = THIS_MODULE, + .label = DRV_NAME, + + .base = 0, + .ngpio = 28, + + .get = chip_gpio_get, + .set = chip_gpio_set, + + .direction_input = chip_direction_input, + .direction_output = chip_direction_output, + }, +}; + +static int __init cs5535_gpio_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + int err; + + /* There are two ways to get the GPIO base address; one is by + * fetching it from MSR_LBAR_GPIO, the other is by reading the + * PCI BAR info. The latter method is easier (especially across + * different architectures), so we'll stick with that for now. If + * it turns out to be unreliable in the face of crappy BIOSes, we + * can always go back to using MSRs.. */ + + err = pci_enable_device_io(pdev); + if (err) { + dev_err(&pdev->dev, "can't enable device IO\n"); + goto done; + } + + err = pci_request_region(pdev, GPIO_BAR, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", GPIO_BAR); + goto done; + } + + /* set up the driver-specific struct */ + cs5535_gpio_chip.base = pci_resource_start(pdev, GPIO_BAR); + cs5535_gpio_chip.pdev = pdev; + spin_lock_init(&cs5535_gpio_chip.lock); + + dev_info(&pdev->dev, "allocated PCI BAR #%d: base 0x%llx\n", GPIO_BAR, + (unsigned long long) cs5535_gpio_chip.base); + + /* finally, register with the generic GPIO API */ + err = gpiochip_add(&cs5535_gpio_chip.chip); + if (err) { + dev_err(&pdev->dev, "failed to register gpio chip\n"); + goto release_region; + } + + printk(KERN_INFO DRV_NAME ": GPIO support successfully loaded.\n"); + return 0; + +release_region: + pci_release_region(pdev, GPIO_BAR); +done: + return err; +} + +static void __exit cs5535_gpio_remove(struct pci_dev *pdev) +{ + int err; + + err = gpiochip_remove(&cs5535_gpio_chip.chip); + if (err) { + /* uhh? */ + dev_err(&pdev->dev, "unable to remove gpio_chip?\n"); + } + pci_release_region(pdev, GPIO_BAR); +} + +static struct pci_device_id cs5535_gpio_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, cs5535_gpio_pci_tbl); + +/* + * We can't use the standard PCI driver registration stuff here, since + * that allows only one driver to bind to each PCI device (and we want + * multiple drivers to be able to bind to the device). Instead, manually + * scan for the PCI device, request a single region, and keep track of the + * devices that we're using. + */ + +static int __init cs5535_gpio_scan_pci(void) +{ + struct pci_dev *pdev; + int err = -ENODEV; + int i; + + for (i = 0; i < ARRAY_SIZE(cs5535_gpio_pci_tbl); i++) { + pdev = pci_get_device(cs5535_gpio_pci_tbl[i].vendor, + cs5535_gpio_pci_tbl[i].device, NULL); + if (pdev) { + err = cs5535_gpio_probe(pdev, &cs5535_gpio_pci_tbl[i]); + if (err) + pci_dev_put(pdev); + + /* we only support a single CS5535/6 southbridge */ + break; + } + } + + return err; +} + +static void __exit cs5535_gpio_free_pci(void) +{ + cs5535_gpio_remove(cs5535_gpio_chip.pdev); + pci_dev_put(cs5535_gpio_chip.pdev); +} + +static int __init cs5535_gpio_init(void) +{ + return cs5535_gpio_scan_pci(); +} + +static void __exit cs5535_gpio_exit(void) +{ + cs5535_gpio_free_pci(); +} + +module_init(cs5535_gpio_init); +module_exit(cs5535_gpio_exit); + +MODULE_AUTHOR("Andres Salomon "); +MODULE_DESCRIPTION("AMD CS5535/CS5536 GPIO driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h new file mode 100644 index 000000000000..cfea689dfa34 --- /dev/null +++ b/include/linux/cs5535.h @@ -0,0 +1,58 @@ +/* + * AMD CS5535/CS5536 definitions + * Copyright (C) 2006 Advanced Micro Devices, Inc. + * Copyright (C) 2009 Andres Salomon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef _CS5535_H +#define _CS5535_H + +/* MSRs */ +#define MSR_LBAR_SMB 0x5140000B +#define MSR_LBAR_GPIO 0x5140000C +#define MSR_LBAR_MFGPT 0x5140000D +#define MSR_LBAR_ACPI 0x5140000E +#define MSR_LBAR_PMS 0x5140000F + +/* resource sizes */ +#define LBAR_GPIO_SIZE 0xFF +#define LBAR_MFGPT_SIZE 0x40 +#define LBAR_ACPI_SIZE 0x40 +#define LBAR_PMS_SIZE 0x80 + +/* GPIOs */ +#define GPIO_OUTPUT_VAL 0x00 +#define GPIO_OUTPUT_ENABLE 0x04 +#define GPIO_OUTPUT_OPEN_DRAIN 0x08 +#define GPIO_OUTPUT_INVERT 0x0C +#define GPIO_OUTPUT_AUX1 0x10 +#define GPIO_OUTPUT_AUX2 0x14 +#define GPIO_PULL_UP 0x18 +#define GPIO_PULL_DOWN 0x1C +#define GPIO_INPUT_ENABLE 0x20 +#define GPIO_INPUT_INVERT 0x24 +#define GPIO_INPUT_FILTER 0x28 +#define GPIO_INPUT_EVENT_COUNT 0x2C +#define GPIO_READ_BACK 0x30 +#define GPIO_INPUT_AUX1 0x34 +#define GPIO_EVENTS_ENABLE 0x38 +#define GPIO_LOCK_ENABLE 0x3C +#define GPIO_POSITIVE_EDGE_EN 0x40 +#define GPIO_NEGATIVE_EDGE_EN 0x44 +#define GPIO_POSITIVE_EDGE_STS 0x48 +#define GPIO_NEGATIVE_EDGE_STS 0x4C + +#define GPIO_MAP_X 0xE0 +#define GPIO_MAP_Y 0xE4 +#define GPIO_MAP_Z 0xE8 +#define GPIO_MAP_W 0xEC + +void cs5535_gpio_set(unsigned offset, unsigned int reg); +void cs5535_gpio_clear(unsigned offset, unsigned int reg); +int cs5535_gpio_isset(unsigned offset, unsigned int reg); + +#endif -- cgit v1.2.3 From 3c55494670745e523f69b56edb66ca0b50a470c2 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:36 -0800 Subject: ALSA: cs5535audio: free OLPC quirks from reliance on MGEODE_LX cpu optimization Previously, OLPC support for the mic extensions was only enabled in the ALSA driver if CONFIG_OLPC and CONFIG_MGEODE_LX were both set. This was because the old geode GPIO code was written in a manner that assumed CONFIG_MGEODE_LX. With the new cs553x-gpio driver, this is no longer the case; as such, we can drop the requirement on CONFIG_MGEODE_LX and instead include a requirement on GPIOLIB. We use the generic GPIO API rather than the cs553x-specific API. Signed-off-by: Andres Salomon Cc: Takashi Iwai Cc: Jordan Crouse Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/olpc.h | 2 +- sound/pci/cs5535audio/Makefile | 2 -- sound/pci/cs5535audio/cs5535audio.c | 1 + sound/pci/cs5535audio/cs5535audio.h | 4 +++- sound/pci/cs5535audio/cs5535audio_olpc.c | 26 +++++++++++++++++++------- 6 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 32a1918e1b88..6ad0985e8d76 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2024,6 +2024,7 @@ config GEODE_MFGPT_TIMER config OLPC bool "One Laptop Per Child support" + select GPIOLIB default n ---help--- Add support for detecting the unique features of the OLPC diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index 834a30295fab..3a57385d9fa7 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h @@ -120,7 +120,7 @@ extern int olpc_ec_mask_unset(uint8_t bits); /* GPIO assignments */ -#define OLPC_GPIO_MIC_AC geode_gpio(1) +#define OLPC_GPIO_MIC_AC 1 #define OLPC_GPIO_DCON_IRQ geode_gpio(7) #define OLPC_GPIO_THRM_ALRM geode_gpio(10) #define OLPC_GPIO_SMB_CLK geode_gpio(14) diff --git a/sound/pci/cs5535audio/Makefile b/sound/pci/cs5535audio/Makefile index fda7a94c992f..ccc642269b9e 100644 --- a/sound/pci/cs5535audio/Makefile +++ b/sound/pci/cs5535audio/Makefile @@ -4,9 +4,7 @@ snd-cs5535audio-y := cs5535audio.o cs5535audio_pcm.o snd-cs5535audio-$(CONFIG_PM) += cs5535audio_pm.o -ifdef CONFIG_MGEODE_LX snd-cs5535audio-$(CONFIG_OLPC) += cs5535audio_olpc.o -endif # Toplevel Module Dependency obj-$(CONFIG_SND_CS5535AUDIO) += snd-cs5535audio.o diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index 05f56e04849b..91e7faf69bbb 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -389,6 +389,7 @@ probefail_out: static void __devexit snd_cs5535audio_remove(struct pci_dev *pci) { + olpc_quirks_cleanup(); snd_card_free(pci_get_drvdata(pci)); pci_set_drvdata(pci, NULL); } diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h index 7a298ac662e3..51966d782a3c 100644 --- a/sound/pci/cs5535audio/cs5535audio.h +++ b/sound/pci/cs5535audio/cs5535audio.h @@ -99,10 +99,11 @@ int snd_cs5535audio_suspend(struct pci_dev *pci, pm_message_t state); int snd_cs5535audio_resume(struct pci_dev *pci); #endif -#if defined(CONFIG_OLPC) && defined(CONFIG_MGEODE_LX) +#ifdef CONFIG_OLPC void __devinit olpc_prequirks(struct snd_card *card, struct snd_ac97_template *ac97); int __devinit olpc_quirks(struct snd_card *card, struct snd_ac97 *ac97); +void __devexit olpc_quirks_cleanup(void); void olpc_analog_input(struct snd_ac97 *ac97, int on); void olpc_mic_bias(struct snd_ac97 *ac97, int on); @@ -128,6 +129,7 @@ static inline int olpc_quirks(struct snd_card *card, struct snd_ac97 *ac97) { return 0; } +static inline void olpc_quirks_cleanup(void) { } static inline void olpc_analog_input(struct snd_ac97 *ac97, int on) { } static inline void olpc_mic_bias(struct snd_ac97 *ac97, int on) { } static inline void olpc_capture_open(struct snd_ac97 *ac97) { } diff --git a/sound/pci/cs5535audio/cs5535audio_olpc.c b/sound/pci/cs5535audio/cs5535audio_olpc.c index 5c6814335cd7..50da49be9ae5 100644 --- a/sound/pci/cs5535audio/cs5535audio_olpc.c +++ b/sound/pci/cs5535audio/cs5535audio_olpc.c @@ -13,10 +13,13 @@ #include #include #include +#include #include #include "cs5535audio.h" +#define DRV_NAME "cs5535audio-olpc" + /* * OLPC has an additional feature on top of the regular AD1888 codec features. * It has an Analog Input mode that is switched into (after disabling the @@ -38,10 +41,7 @@ void olpc_analog_input(struct snd_ac97 *ac97, int on) } /* set Analog Input through GPIO */ - if (on) - geode_gpio_set(OLPC_GPIO_MIC_AC, GPIO_OUTPUT_VAL); - else - geode_gpio_clear(OLPC_GPIO_MIC_AC, GPIO_OUTPUT_VAL); + gpio_set_value(OLPC_GPIO_MIC_AC, on); } /* @@ -73,8 +73,7 @@ static int olpc_dc_info(struct snd_kcontrol *kctl, static int olpc_dc_get(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *v) { - v->value.integer.value[0] = geode_gpio_isset(OLPC_GPIO_MIC_AC, - GPIO_OUTPUT_VAL); + v->value.integer.value[0] = gpio_get_value(OLPC_GPIO_MIC_AC); return 0; } @@ -153,6 +152,12 @@ int __devinit olpc_quirks(struct snd_card *card, struct snd_ac97 *ac97) if (!machine_is_olpc()) return 0; + if (gpio_request(OLPC_GPIO_MIC_AC, DRV_NAME)) { + printk(KERN_ERR DRV_NAME ": unable to allocate MIC GPIO\n"); + return -EIO; + } + gpio_direction_output(OLPC_GPIO_MIC_AC, 0); + /* drop the original AD1888 HPF control */ memset(&elem, 0, sizeof(elem)); elem.iface = SNDRV_CTL_ELEM_IFACE_MIXER; @@ -169,11 +174,18 @@ int __devinit olpc_quirks(struct snd_card *card, struct snd_ac97 *ac97) for (i = 0; i < ARRAY_SIZE(olpc_cs5535audio_ctls); i++) { err = snd_ctl_add(card, snd_ctl_new1(&olpc_cs5535audio_ctls[i], ac97->private_data)); - if (err < 0) + if (err < 0) { + gpio_free(OLPC_GPIO_MIC_AC); return err; + } } /* turn off the mic by default */ olpc_mic_bias(ac97, 0); return 0; } + +void __devexit olpc_quirks_cleanup(void) +{ + gpio_free(OLPC_GPIO_MIC_AC); +} -- cgit v1.2.3 From 82dca611bb516ec5fb7d04077733d6a4b70f52d1 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:37 -0800 Subject: cs5535: add a generic MFGPT driver This is based on the old code on arch/x86/kernel/mfgpt_32.c, except it's not x86 specific, it's modular, and it makes use of a PCI BAR rather than a random MSR. Currently module unloading is not supported; it's uncertain whether or not it can be made work with the hardware. [akpm@linux-foundation.org: add X86 dependency] Signed-off-by: Andres Salomon Cc: Jordan Crouse Cc: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Cc: Chris Ball Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/geode.h | 40 ----- drivers/misc/Kconfig | 24 +++ drivers/misc/Makefile | 1 + drivers/misc/cs5535-mfgpt.c | 370 +++++++++++++++++++++++++++++++++++++++++++ include/linux/cs5535.h | 67 ++++++++ 5 files changed, 462 insertions(+), 40 deletions(-) create mode 100644 drivers/misc/cs5535-mfgpt.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index 5716214d37d9..547e9642642a 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -47,16 +47,6 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_DIVIL_SOFT_RESET 0x51400017 -#define MSR_PIC_YSEL_LOW 0x51400020 -#define MSR_PIC_YSEL_HIGH 0x51400021 -#define MSR_PIC_ZSEL_LOW 0x51400022 -#define MSR_PIC_ZSEL_HIGH 0x51400023 -#define MSR_PIC_IRQM_LPC 0x51400025 - -#define MSR_MFGPT_IRQ 0x51400028 -#define MSR_MFGPT_NR 0x51400029 -#define MSR_MFGPT_SETUP 0x5140002B - #define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ #define MSR_GX_GLD_MSR_CONFIG 0xC0002001 @@ -169,36 +159,6 @@ static inline int geode_has_vsa2(void) } #endif -/* MFGPTs */ - -#define MFGPT_MAX_TIMERS 8 -#define MFGPT_TIMER_ANY (-1) - -#define MFGPT_DOMAIN_WORKING 1 -#define MFGPT_DOMAIN_STANDBY 2 -#define MFGPT_DOMAIN_ANY (MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY) - -#define MFGPT_CMP1 0 -#define MFGPT_CMP2 1 - -#define MFGPT_EVENT_IRQ 0 -#define MFGPT_EVENT_NMI 1 -#define MFGPT_EVENT_RESET 3 - -#define MFGPT_REG_CMP1 0 -#define MFGPT_REG_CMP2 2 -#define MFGPT_REG_COUNTER 4 -#define MFGPT_REG_SETUP 6 - -#define MFGPT_SETUP_CNTEN (1 << 15) -#define MFGPT_SETUP_CMP2 (1 << 14) -#define MFGPT_SETUP_CMP1 (1 << 13) -#define MFGPT_SETUP_SETUP (1 << 12) -#define MFGPT_SETUP_STOPEN (1 << 11) -#define MFGPT_SETUP_EXTEN (1 << 10) -#define MFGPT_SETUP_REVEN (1 << 5) -#define MFGPT_SETUP_CLKSEL (1 << 4) - static inline void geode_mfgpt_write(int timer, u16 reg, u16 value) { u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 724d1188a322..d6d6d846f0d6 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -187,6 +187,30 @@ config SGI_XP this feature will allow for direct communication between SSIs based on a network adapter and DMA messaging. +config CS5535_MFGPT + tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" + depends on PCI && !MGEODE_LX + depends on X86 + default n + help + This driver provides access to MFGPT functionality for other + drivers that need timers. MFGPTs are available in the CS5535 and + CS5536 companion chips that are found in AMD Geode and several + other platforms. They have a better resolution and max interval + than the generic PIT, and are suitable for use as high-res timers. + You probably don't want to enable this manually; other drivers that + make use of it should enable it. + +config CS5535_MFGPT_DEFAULT_IRQ + int + default 7 + help + MFGPTs on the CS5535 require an interrupt. The selected IRQ + can be overridden as a module option as well as by driver that + use the cs5535_mfgpt_ API; however, different architectures might + want to use a different IRQ by default. This is here for + architectures to set as necessary. + config HP_ILO tristate "Channel interface driver for HP iLO/iLO2 processor" depends on PCI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index e76b77977442..049ff2482f30 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o obj-$(CONFIG_KGDB_TESTS) += kgdbts.o obj-$(CONFIG_SGI_XP) += sgi-xp/ obj-$(CONFIG_SGI_GRU) += sgi-gru/ +obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o obj-$(CONFIG_HP_ILO) += hpilo.o obj-$(CONFIG_ISL29003) += isl29003.o obj-$(CONFIG_EP93XX_PWM) += ep93xx_pwm.o diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c new file mode 100644 index 000000000000..8110460558ff --- /dev/null +++ b/drivers/misc/cs5535-mfgpt.c @@ -0,0 +1,370 @@ +/* + * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT) + * + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007 Andres Salomon + * Copyright (C) 2009 Andres Salomon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. + */ + +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "cs5535-mfgpt" +#define MFGPT_BAR 2 + +static int mfgpt_reset_timers; +module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644); +MODULE_PARM_DESC(mfgptfix, "Reset the MFGPT timers during init; " + "required by some broken BIOSes (ie, TinyBIOS < 0.99)."); + +struct cs5535_mfgpt_timer { + struct cs5535_mfgpt_chip *chip; + int nr; +}; + +static struct cs5535_mfgpt_chip { + DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS); + resource_size_t base; + + struct pci_dev *pdev; + spinlock_t lock; + int initialized; +} cs5535_mfgpt_chip; + +int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp, + int event, int enable) +{ + uint32_t msr, mask, value, dummy; + int shift = (cmp == MFGPT_CMP1) ? 0 : 8; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * The register maps for these are described in sections 6.17.1.x of + * the AMD Geode CS5536 Companion Device Data Book. + */ + switch (event) { + case MFGPT_EVENT_RESET: + /* + * XXX: According to the docs, we cannot reset timers above + * 6; that is, resets for 7 and 8 will be ignored. Is this + * a problem? -dilinger + */ + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + 24); + break; + + case MFGPT_EVENT_NMI: + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + shift); + break; + + case MFGPT_EVENT_IRQ: + msr = MSR_MFGPT_IRQ; + mask = 1 << (timer->nr + shift); + break; + + default: + return -EIO; + } + + rdmsr(msr, value, dummy); + + if (enable) + value |= mask; + else + value &= ~mask; + + wrmsr(msr, value, dummy); + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event); + +int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq, + int enable) +{ + uint32_t zsel, lpc, dummy; + int shift; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) + return -EIO; + + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ; + + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + + /* All chosen and checked - go for it */ + if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; + if (enable) { + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + } + + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq); + +struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain) +{ + struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip; + struct cs5535_mfgpt_timer *timer = NULL; + unsigned long flags; + int max; + + if (!mfgpt->initialized) + goto done; + + /* only allocate timers from the working domain if requested */ + if (domain == MFGPT_DOMAIN_WORKING) + max = 6; + else + max = MFGPT_MAX_TIMERS; + + if (timer_nr >= max) { + /* programmer error. silly programmers! */ + WARN_ON(1); + goto done; + } + + spin_lock_irqsave(&mfgpt->lock, flags); + if (timer_nr < 0) { + unsigned long t; + + /* try to find any available timer */ + t = find_first_bit(mfgpt->avail, max); + /* set timer_nr to -1 if no timers available */ + timer_nr = t < max ? (int) t : -1; + } else { + /* check if the requested timer's available */ + if (test_bit(timer_nr, mfgpt->avail)) + timer_nr = -1; + } + + if (timer_nr >= 0) + /* if timer_nr is not -1, it's an available timer */ + __clear_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + + if (timer_nr < 0) + goto done; + + timer = kmalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) { + /* aw hell */ + spin_lock_irqsave(&mfgpt->lock, flags); + __set_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + goto done; + } + timer->chip = mfgpt; + timer->nr = timer_nr; + dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr); + +done: + return timer; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer); + +/* + * XXX: This frees the timer memory, but never resets the actual hardware + * timer. The old geode_mfgpt code did this; it would be good to figure + * out a way to actually release the hardware timer. See comments below. + */ +void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer) +{ + kfree(timer); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer); + +uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg) +{ + return inw(timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_read); + +void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg, + uint16_t value) +{ + outw(value, timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_write); + +/* + * This is a sledgehammer that resets all MFGPT timers. This is required by + * some broken BIOSes which leave the system in an unstable state + * (TinyBIOS 0.98, for example; fixed in 0.99). It's uncertain as to + * whether or not this secret MSR can be used to release individual timers. + * Jordan tells me that he and Mitch once played w/ it, but it's unclear + * what the results of that were (and they experienced some instability). + */ +static void __init reset_all_timers(void) +{ + uint32_t val, dummy; + + /* The following undocumented bit resets the MFGPT timers */ + val = 0xFF; dummy = 0; + wrmsr(MSR_MFGPT_SETUP, val, dummy); +} + +/* + * Check whether any MFGPTs are available for the kernel to use. In most + * cases, firmware that uses AMD's VSA code will claim all timers during + * bootup; we certainly don't want to take them if they're already in use. + * In other cases (such as with VSAless OpenFirmware), the system firmware + * leaves timers available for us to use. + */ +static int __init scan_timers(struct cs5535_mfgpt_chip *mfgpt) +{ + struct cs5535_mfgpt_timer timer = { .chip = mfgpt }; + unsigned long flags; + int timers = 0; + uint16_t val; + int i; + + /* bios workaround */ + if (mfgpt_reset_timers) + reset_all_timers(); + + /* just to be safe, protect this section w/ lock */ + spin_lock_irqsave(&mfgpt->lock, flags); + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + timer.nr = i; + val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP)) { + __set_bit(i, mfgpt->avail); + timers++; + } + } + spin_unlock_irqrestore(&mfgpt->lock, flags); + + return timers; +} + +static int __init cs5535_mfgpt_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + int err, t; + + /* There are two ways to get the MFGPT base address; one is by + * fetching it from MSR_LBAR_MFGPT, the other is by reading the + * PCI BAR info. The latter method is easier (especially across + * different architectures), so we'll stick with that for now. If + * it turns out to be unreliable in the face of crappy BIOSes, we + * can always go back to using MSRs.. */ + + err = pci_enable_device_io(pdev); + if (err) { + dev_err(&pdev->dev, "can't enable device IO\n"); + goto done; + } + + err = pci_request_region(pdev, MFGPT_BAR, DRV_NAME); + if (err) { + dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", MFGPT_BAR); + goto done; + } + + /* set up the driver-specific struct */ + cs5535_mfgpt_chip.base = pci_resource_start(pdev, MFGPT_BAR); + cs5535_mfgpt_chip.pdev = pdev; + spin_lock_init(&cs5535_mfgpt_chip.lock); + + dev_info(&pdev->dev, "allocated PCI BAR #%d: base 0x%llx\n", MFGPT_BAR, + (unsigned long long) cs5535_mfgpt_chip.base); + + /* detect the available timers */ + t = scan_timers(&cs5535_mfgpt_chip); + dev_info(&pdev->dev, DRV_NAME ": %d MFGPT timers available\n", t); + cs5535_mfgpt_chip.initialized = 1; + return 0; + +done: + return err; +} + +static struct pci_device_id cs5535_mfgpt_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) }, + { 0, }, +}; +MODULE_DEVICE_TABLE(pci, cs5535_mfgpt_pci_tbl); + +/* + * Just like with the cs5535-gpio driver, we can't use the standard PCI driver + * registration stuff. It only allows only one driver to bind to each PCI + * device, and we want the GPIO and MFGPT drivers to be able to share a PCI + * device. Instead, we manually scan for the PCI device, request a single + * region, and keep track of the devices that we're using. + */ + +static int __init cs5535_mfgpt_scan_pci(void) +{ + struct pci_dev *pdev; + int err = -ENODEV; + int i; + + for (i = 0; i < ARRAY_SIZE(cs5535_mfgpt_pci_tbl); i++) { + pdev = pci_get_device(cs5535_mfgpt_pci_tbl[i].vendor, + cs5535_mfgpt_pci_tbl[i].device, NULL); + if (pdev) { + err = cs5535_mfgpt_probe(pdev, + &cs5535_mfgpt_pci_tbl[i]); + if (err) + pci_dev_put(pdev); + + /* we only support a single CS5535/6 southbridge */ + break; + } + } + + return err; +} + +static int __init cs5535_mfgpt_init(void) +{ + return cs5535_mfgpt_scan_pci(); +} + +module_init(cs5535_mfgpt_init); + +MODULE_AUTHOR("Andres Salomon "); +MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h index cfea689dfa34..30ecb04fa570 100644 --- a/include/linux/cs5535.h +++ b/include/linux/cs5535.h @@ -18,6 +18,16 @@ #define MSR_LBAR_ACPI 0x5140000E #define MSR_LBAR_PMS 0x5140000F +#define MSR_PIC_YSEL_LOW 0x51400020 +#define MSR_PIC_YSEL_HIGH 0x51400021 +#define MSR_PIC_ZSEL_LOW 0x51400022 +#define MSR_PIC_ZSEL_HIGH 0x51400023 +#define MSR_PIC_IRQM_LPC 0x51400025 + +#define MSR_MFGPT_IRQ 0x51400028 +#define MSR_MFGPT_NR 0x51400029 +#define MSR_MFGPT_SETUP 0x5140002B + /* resource sizes */ #define LBAR_GPIO_SIZE 0xFF #define LBAR_MFGPT_SIZE 0x40 @@ -55,4 +65,61 @@ void cs5535_gpio_set(unsigned offset, unsigned int reg); void cs5535_gpio_clear(unsigned offset, unsigned int reg); int cs5535_gpio_isset(unsigned offset, unsigned int reg); +/* MFGPTs */ + +#define MFGPT_MAX_TIMERS 8 +#define MFGPT_TIMER_ANY (-1) + +#define MFGPT_DOMAIN_WORKING 1 +#define MFGPT_DOMAIN_STANDBY 2 +#define MFGPT_DOMAIN_ANY (MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY) + +#define MFGPT_CMP1 0 +#define MFGPT_CMP2 1 + +#define MFGPT_EVENT_IRQ 0 +#define MFGPT_EVENT_NMI 1 +#define MFGPT_EVENT_RESET 3 + +#define MFGPT_REG_CMP1 0 +#define MFGPT_REG_CMP2 2 +#define MFGPT_REG_COUNTER 4 +#define MFGPT_REG_SETUP 6 + +#define MFGPT_SETUP_CNTEN (1 << 15) +#define MFGPT_SETUP_CMP2 (1 << 14) +#define MFGPT_SETUP_CMP1 (1 << 13) +#define MFGPT_SETUP_SETUP (1 << 12) +#define MFGPT_SETUP_STOPEN (1 << 11) +#define MFGPT_SETUP_EXTEN (1 << 10) +#define MFGPT_SETUP_REVEN (1 << 5) +#define MFGPT_SETUP_CLKSEL (1 << 4) + +struct cs5535_mfgpt_timer; + +extern uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, + uint16_t reg); +extern void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg, + uint16_t value); + +extern int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp, + int event, int enable); +extern int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, + int *irq, int enable); +extern struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer, + int domain); +extern void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer); + +static inline int cs5535_mfgpt_setup_irq(struct cs5535_mfgpt_timer *timer, + int cmp, int *irq) +{ + return cs5535_mfgpt_set_irq(timer, cmp, irq, 1); +} + +static inline int cs5535_mfgpt_release_irq(struct cs5535_mfgpt_timer *timer, + int cmp, int *irq) +{ + return cs5535_mfgpt_set_irq(timer, cmp, irq, 0); +} + #endif -- cgit v1.2.3 From 2e8c12436f540d3c40137ebf10268803dc972f6a Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:39 -0800 Subject: cs5535: move the DIVIL MSR definition into linux/cs5535.h The only thing that uses this is the reboot_fixups code. Signed-off-by: Andres Salomon Cc: Jordan Crouse Cc: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Cc: Chris Ball Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/geode.h | 2 -- arch/x86/kernel/reboot_fixups_32.c | 2 +- include/linux/cs5535.h | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index 547e9642642a..976b3f11c009 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -45,8 +45,6 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_LBAR_ACPI 0x5140000E #define MSR_LBAR_PMS 0x5140000F -#define MSR_DIVIL_SOFT_RESET 0x51400017 - #define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ #define MSR_GX_GLD_MSR_CONFIG 0xC0002001 diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 201eab63b05f..fda313ebbb03 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static void cs5530a_warm_reset(struct pci_dev *dev) { diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h index 30ecb04fa570..39e93e8ed95d 100644 --- a/include/linux/cs5535.h +++ b/include/linux/cs5535.h @@ -18,6 +18,8 @@ #define MSR_LBAR_ACPI 0x5140000E #define MSR_LBAR_PMS 0x5140000F +#define MSR_DIVIL_SOFT_RESET 0x51400017 + #define MSR_PIC_YSEL_LOW 0x51400020 #define MSR_PIC_YSEL_HIGH 0x51400021 #define MSR_PIC_ZSEL_LOW 0x51400022 -- cgit v1.2.3 From f060f27007b393bac6e50ee6fc26d8505acf6fe4 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:40 -0800 Subject: cs5535: move VSA2 checks into linux/cs5535.h Signed-off-by: Andres Salomon Cc: Jordan Crouse Cc: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Cc: Chris Ball Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/geode.h | 19 ------------------- arch/x86/kernel/geode_32.c | 22 ---------------------- arch/x86/kernel/olpc.c | 4 ++-- drivers/video/geode/display_gx.c | 2 +- drivers/video/geode/lxfb_ops.c | 2 +- include/linux/cs5535.h | 32 ++++++++++++++++++++++++++++++++ 6 files changed, 36 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index 976b3f11c009..df1eaf87426a 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -95,16 +95,6 @@ extern int geode_get_dev_base(unsigned int dev); #define PM_AWKD 0x50 #define PM_SSC 0x54 -/* VSA2 magic values */ - -#define VSA_VRC_INDEX 0xAC1C -#define VSA_VRC_DATA 0xAC1E -#define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */ -#define VSA_VR_SIGNATURE 0x0003 -#define VSA_VR_MEM_SIZE 0x0200 -#define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */ -#define GSW_VSA_SIG 0x534d /* General Software signature */ - static inline u32 geode_gpio(unsigned int nr) { BUG_ON(nr > 28); @@ -148,15 +138,6 @@ static inline int is_geode(void) return (is_geode_gx() || is_geode_lx()); } -#ifdef CONFIG_MGEODE_LX -extern int geode_has_vsa2(void); -#else -static inline int geode_has_vsa2(void) -{ - return 0; -} -#endif - static inline void geode_mfgpt_write(int timer, u16 reg, u16 value) { u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index 9b08e852fd1a..9dad6ca6cd70 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c @@ -161,28 +161,6 @@ void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) } EXPORT_SYMBOL_GPL(geode_gpio_setup_event); -int geode_has_vsa2(void) -{ - static int has_vsa2 = -1; - - if (has_vsa2 == -1) { - u16 val; - - /* - * The VSA has virtual registers that we can query for a - * signature. - */ - outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); - outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); - - val = inw(VSA_VRC_DATA); - has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); - } - - return has_vsa2; -} -EXPORT_SYMBOL_GPL(geode_has_vsa2); - static int __init geode_southbridge_init(void) { if (!is_geode()) diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 4006c522adc7..9d1d263f786f 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -212,7 +212,7 @@ static int __init olpc_init(void) unsigned char *romsig; /* The ioremap check is dangerous; limit what we run it on */ - if (!is_geode() || geode_has_vsa2()) + if (!is_geode() || cs5535_has_vsa2()) return 0; spin_lock_init(&ec_lock); @@ -244,7 +244,7 @@ static int __init olpc_init(void) (unsigned char *) &olpc_platform_info.ecver, 1); /* check to see if the VSA exists */ - if (geode_has_vsa2()) + if (cs5535_has_vsa2()) olpc_platform_info.flags |= OLPC_F_VSA; printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", diff --git a/drivers/video/geode/display_gx.c b/drivers/video/geode/display_gx.c index e759895bf3d3..3ddf055e302e 100644 --- a/drivers/video/geode/display_gx.c +++ b/drivers/video/geode/display_gx.c @@ -25,7 +25,7 @@ unsigned int gx_frame_buffer_size(void) { unsigned int val; - if (!geode_has_vsa2()) { + if (!cs5535_has_vsa2()) { uint32_t hi, lo; /* The number of pages is (PMAX - PMIN)+1 */ diff --git a/drivers/video/geode/lxfb_ops.c b/drivers/video/geode/lxfb_ops.c index b1cd49c99356..fe1ee7cbccda 100644 --- a/drivers/video/geode/lxfb_ops.c +++ b/drivers/video/geode/lxfb_ops.c @@ -307,7 +307,7 @@ unsigned int lx_framebuffer_size(void) { unsigned int val; - if (!geode_has_vsa2()) { + if (!cs5535_has_vsa2()) { uint32_t hi, lo; /* The number of pages is (PMAX - PMIN)+1 */ diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h index 39e93e8ed95d..eb34108a608b 100644 --- a/include/linux/cs5535.h +++ b/include/linux/cs5535.h @@ -36,6 +36,38 @@ #define LBAR_ACPI_SIZE 0x40 #define LBAR_PMS_SIZE 0x80 +/* VSA2 magic values */ +#define VSA_VRC_INDEX 0xAC1C +#define VSA_VRC_DATA 0xAC1E +#define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */ +#define VSA_VR_SIGNATURE 0x0003 +#define VSA_VR_MEM_SIZE 0x0200 +#define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */ +#define GSW_VSA_SIG 0x534d /* General Software signature */ + +#include + +static inline int cs5535_has_vsa2(void) +{ + static int has_vsa2 = -1; + + if (has_vsa2 == -1) { + uint16_t val; + + /* + * The VSA has virtual registers that we can query for a + * signature. + */ + outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); + outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); + + val = inw(VSA_VRC_DATA); + has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); + } + + return has_vsa2; +} + /* GPIOs */ #define GPIO_OUTPUT_VAL 0x00 #define GPIO_OUTPUT_ENABLE 0x04 -- cgit v1.2.3 From f3a57a60d3e107d17aebb9e52b61c503e5bc14f9 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:40 -0800 Subject: cs5535: define lxfb/gxfb MSRs in linux/cs5535.h ..and include them in the lxfb/gxfb drivers rather than asm/geode.h (where possible). Signed-off-by: Andres Salomon Cc: Jordan Crouse Cc: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Cc: Chris Ball Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/geode.h | 13 ------------- drivers/video/geode/display_gx.c | 2 +- drivers/video/geode/gxfb.h | 2 +- drivers/video/geode/gxfb_core.c | 2 +- drivers/video/geode/lxfb.h | 2 +- drivers/video/geode/lxfb_ops.c | 2 +- drivers/video/geode/suspend_gx.c | 2 +- drivers/video/geode/video_gx.c | 2 +- include/linux/cs5535.h | 13 +++++++++++++ 9 files changed, 20 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index df1eaf87426a..ae104da6ad5a 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -31,25 +31,12 @@ extern int geode_get_dev_base(unsigned int dev); /* MSRS */ -#define MSR_GLIU_P2D_RO0 0x10000029 - -#define MSR_LX_GLD_MSR_CONFIG 0x48002001 -#define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data - * sheet has the wrong value */ -#define MSR_GLCP_SYS_RSTPLL 0x4C000014 -#define MSR_GLCP_DOTPLL 0x4C000015 - #define MSR_LBAR_SMB 0x5140000B #define MSR_LBAR_GPIO 0x5140000C #define MSR_LBAR_MFGPT 0x5140000D #define MSR_LBAR_ACPI 0x5140000E #define MSR_LBAR_PMS 0x5140000F -#define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ - -#define MSR_GX_GLD_MSR_CONFIG 0xC0002001 -#define MSR_GX_MSR_PADSEL 0xC0002011 - /* Resource Sizes */ #define LBAR_GPIO_SIZE 0xFF diff --git a/drivers/video/geode/display_gx.c b/drivers/video/geode/display_gx.c index 3ddf055e302e..f0af911a096d 100644 --- a/drivers/video/geode/display_gx.c +++ b/drivers/video/geode/display_gx.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "gxfb.h" diff --git a/drivers/video/geode/gxfb.h b/drivers/video/geode/gxfb.h index 16a96f8fd8c5..d19e9378b0c0 100644 --- a/drivers/video/geode/gxfb.h +++ b/drivers/video/geode/gxfb.h @@ -340,7 +340,7 @@ static inline void write_fp(struct gxfb_par *par, int reg, uint32_t val) } -/* MSRs are defined in asm/geode.h; their bitfields are here */ +/* MSRs are defined in linux/cs5535.h; their bitfields are here */ #define MSR_GLCP_SYS_RSTPLL_DOTPOSTDIV3 (1 << 3) #define MSR_GLCP_SYS_RSTPLL_DOTPREMULT2 (1 << 2) diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c index 2552cac39e1c..b3e639d1e12c 100644 --- a/drivers/video/geode/gxfb_core.c +++ b/drivers/video/geode/gxfb_core.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include "gxfb.h" diff --git a/drivers/video/geode/lxfb.h b/drivers/video/geode/lxfb.h index 6a51448fd3f7..fc68a8b0a144 100644 --- a/drivers/video/geode/lxfb.h +++ b/drivers/video/geode/lxfb.h @@ -409,7 +409,7 @@ static inline void write_fp(struct lxfb_par *par, int reg, uint32_t val) } -/* MSRs are defined in asm/geode.h; their bitfields are here */ +/* MSRs are defined in linux/cs5535.h; their bitfields are here */ #define MSR_GLCP_DOTPLL_LOCK (1 << 25) /* r/o */ #define MSR_GLCP_DOTPLL_HALFPIX (1 << 24) diff --git a/drivers/video/geode/lxfb_ops.c b/drivers/video/geode/lxfb_ops.c index fe1ee7cbccda..0e5d8c7c3eba 100644 --- a/drivers/video/geode/lxfb_ops.c +++ b/drivers/video/geode/lxfb_ops.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "lxfb.h" diff --git a/drivers/video/geode/suspend_gx.c b/drivers/video/geode/suspend_gx.c index 9aff32ef8bb6..1bb043d70c64 100644 --- a/drivers/video/geode/suspend_gx.c +++ b/drivers/video/geode/suspend_gx.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "gxfb.h" diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c index b8d52a8360db..6082f653c68a 100644 --- a/drivers/video/geode/video_gx.c +++ b/drivers/video/geode/video_gx.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "gxfb.h" diff --git a/include/linux/cs5535.h b/include/linux/cs5535.h index eb34108a608b..d5a1d4810b80 100644 --- a/include/linux/cs5535.h +++ b/include/linux/cs5535.h @@ -12,6 +12,14 @@ #define _CS5535_H /* MSRs */ +#define MSR_GLIU_P2D_RO0 0x10000029 + +#define MSR_LX_GLD_MSR_CONFIG 0x48002001 +#define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data + * sheet has the wrong value */ +#define MSR_GLCP_SYS_RSTPLL 0x4C000014 +#define MSR_GLCP_DOTPLL 0x4C000015 + #define MSR_LBAR_SMB 0x5140000B #define MSR_LBAR_GPIO 0x5140000C #define MSR_LBAR_MFGPT 0x5140000D @@ -30,6 +38,11 @@ #define MSR_MFGPT_NR 0x51400029 #define MSR_MFGPT_SETUP 0x5140002B +#define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */ + +#define MSR_GX_GLD_MSR_CONFIG 0xC0002001 +#define MSR_GX_MSR_PADSEL 0xC0002011 + /* resource sizes */ #define LBAR_GPIO_SIZE 0xFF #define LBAR_MFGPT_SIZE 0x40 -- cgit v1.2.3 From c95d1e53ed89b75a4d7b68d1cbae4607b1479243 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 14 Dec 2009 18:00:41 -0800 Subject: cs5535: drop the Geode-specific MFGPT/GPIO code With generic modular drivers handling all of this stuff, the geode-specific code can go away. The cs5535-gpio, cs5535-mfgpt, and cs5535-clockevt drivers now handle this. Signed-off-by: Andres Salomon Cc: Jordan Crouse Cc: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Cc: Chris Ball Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 10 -- arch/x86/include/asm/geode.h | 117 ------------ arch/x86/kernel/Makefile | 1 - arch/x86/kernel/geode_32.c | 174 ------------------ arch/x86/kernel/mfgpt_32.c | 410 ------------------------------------------- drivers/gpio/Kconfig | 2 +- drivers/misc/Kconfig | 2 +- 7 files changed, 2 insertions(+), 714 deletions(-) delete mode 100644 arch/x86/kernel/geode_32.c delete mode 100644 arch/x86/kernel/mfgpt_32.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6ad0985e8d76..3b2a5aca4edb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2012,16 +2012,6 @@ config SCx200HR_TIMER processor goes idle (as is done by the scheduler). The other workaround is idle=poll boot option. -config GEODE_MFGPT_TIMER - def_bool y - prompt "Geode Multi-Function General Purpose Timer (MFGPT) events" - depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS - ---help--- - This driver provides a clock event source based on the MFGPT - timer(s) in the CS5535 and CS5536 companion chip for the geode. - MFGPTs have a better resolution and max interval than the - generic PIT, and are suitable for use as high-res timers. - config OLPC bool "One Laptop Per Child support" select GPIOLIB diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h index ae104da6ad5a..7cd73552a4e8 100644 --- a/arch/x86/include/asm/geode.h +++ b/arch/x86/include/asm/geode.h @@ -14,98 +14,6 @@ #include #include -/* Generic southbridge functions */ - -#define GEODE_DEV_PMS 0 -#define GEODE_DEV_ACPI 1 -#define GEODE_DEV_GPIO 2 -#define GEODE_DEV_MFGPT 3 - -extern int geode_get_dev_base(unsigned int dev); - -/* Useful macros */ -#define geode_pms_base() geode_get_dev_base(GEODE_DEV_PMS) -#define geode_acpi_base() geode_get_dev_base(GEODE_DEV_ACPI) -#define geode_gpio_base() geode_get_dev_base(GEODE_DEV_GPIO) -#define geode_mfgpt_base() geode_get_dev_base(GEODE_DEV_MFGPT) - -/* MSRS */ - -#define MSR_LBAR_SMB 0x5140000B -#define MSR_LBAR_GPIO 0x5140000C -#define MSR_LBAR_MFGPT 0x5140000D -#define MSR_LBAR_ACPI 0x5140000E -#define MSR_LBAR_PMS 0x5140000F - -/* Resource Sizes */ - -#define LBAR_GPIO_SIZE 0xFF -#define LBAR_MFGPT_SIZE 0x40 -#define LBAR_ACPI_SIZE 0x40 -#define LBAR_PMS_SIZE 0x80 - -/* ACPI registers (PMS block) */ - -/* - * PM1_EN is only valid when VSA is enabled for 16 bit reads. - * When VSA is not enabled, *always* read both PM1_STS and PM1_EN - * with a 32 bit read at offset 0x0 - */ - -#define PM1_STS 0x00 -#define PM1_EN 0x02 -#define PM1_CNT 0x08 -#define PM2_CNT 0x0C -#define PM_TMR 0x10 -#define PM_GPE0_STS 0x18 -#define PM_GPE0_EN 0x1C - -/* PMC registers (PMS block) */ - -#define PM_SSD 0x00 -#define PM_SCXA 0x04 -#define PM_SCYA 0x08 -#define PM_OUT_SLPCTL 0x0C -#define PM_SCLK 0x10 -#define PM_SED 0x1 -#define PM_SCXD 0x18 -#define PM_SCYD 0x1C -#define PM_IN_SLPCTL 0x20 -#define PM_WKD 0x30 -#define PM_WKXD 0x34 -#define PM_RD 0x38 -#define PM_WKXA 0x3C -#define PM_FSD 0x40 -#define PM_TSD 0x44 -#define PM_PSD 0x48 -#define PM_NWKD 0x4C -#define PM_AWKD 0x50 -#define PM_SSC 0x54 - -static inline u32 geode_gpio(unsigned int nr) -{ - BUG_ON(nr > 28); - return 1 << nr; -} - -extern void geode_gpio_set(u32, unsigned int); -extern void geode_gpio_clear(u32, unsigned int); -extern int geode_gpio_isset(u32, unsigned int); -extern void geode_gpio_setup_event(unsigned int, int, int); -extern void geode_gpio_set_irq(unsigned int, unsigned int); - -static inline void geode_gpio_event_irq(unsigned int gpio, int pair) -{ - geode_gpio_setup_event(gpio, pair, 0); -} - -static inline void geode_gpio_event_pme(unsigned int gpio, int pair) -{ - geode_gpio_setup_event(gpio, pair, 1); -} - -/* Specific geode tests */ - static inline int is_geode_gx(void) { return ((boot_cpu_data.x86_vendor == X86_VENDOR_NSC) && @@ -125,29 +33,4 @@ static inline int is_geode(void) return (is_geode_gx() || is_geode_lx()); } -static inline void geode_mfgpt_write(int timer, u16 reg, u16 value) -{ - u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); - outw(value, base + reg + (timer * 8)); -} - -static inline u16 geode_mfgpt_read(int timer, u16 reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_MFGPT); - return inw(base + reg + (timer * 8)); -} - -extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); -extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable); -extern int geode_mfgpt_alloc_timer(int timer, int domain); - -#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) -#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0) - -#ifdef CONFIG_GEODE_MFGPT_TIMER -extern int __init mfgpt_timer_setup(void); -#else -static inline int mfgpt_timer_setup(void) { return 0; } -#endif - #endif /* _ASM_X86_GEODE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4f2e66e29ecc..d87f09bc5a52 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -89,7 +89,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o -obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c deleted file mode 100644 index 9dad6ca6cd70..000000000000 --- a/arch/x86/kernel/geode_32.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * AMD Geode southbridge support code - * Copyright (C) 2006, Advanced Micro Devices, Inc. - * Copyright (C) 2007, Andres Salomon - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -static struct { - char *name; - u32 msr; - int size; - u32 base; -} lbars[] = { - { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 }, - { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 }, - { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 }, - { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 } -}; - -static void __init init_lbars(void) -{ - u32 lo, hi; - int i; - - for (i = 0; i < ARRAY_SIZE(lbars); i++) { - rdmsr(lbars[i].msr, lo, hi); - if (hi & 0x01) - lbars[i].base = lo & 0x0000ffff; - - if (lbars[i].base == 0) - printk(KERN_ERR "geode: Couldn't initialize '%s'\n", - lbars[i].name); - } -} - -int geode_get_dev_base(unsigned int dev) -{ - BUG_ON(dev >= ARRAY_SIZE(lbars)); - return lbars[dev].base; -} -EXPORT_SYMBOL_GPL(geode_get_dev_base); - -/* === GPIO API === */ - -void geode_gpio_set(u32 gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return; - - /* low bank register */ - if (gpio & 0xFFFF) - outl(gpio & 0xFFFF, base + reg); - /* high bank register */ - gpio >>= 16; - if (gpio) - outl(gpio, base + 0x80 + reg); -} -EXPORT_SYMBOL_GPL(geode_gpio_set); - -void geode_gpio_clear(u32 gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return; - - /* low bank register */ - if (gpio & 0xFFFF) - outl((gpio & 0xFFFF) << 16, base + reg); - /* high bank register */ - gpio &= (0xFFFF << 16); - if (gpio) - outl(gpio, base + 0x80 + reg); -} -EXPORT_SYMBOL_GPL(geode_gpio_clear); - -int geode_gpio_isset(u32 gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - u32 val; - - if (!base) - return 0; - - /* low bank register */ - if (gpio & 0xFFFF) { - val = inl(base + reg) & (gpio & 0xFFFF); - if ((gpio & 0xFFFF) == val) - return 1; - } - /* high bank register */ - gpio >>= 16; - if (gpio) { - val = inl(base + 0x80 + reg) & gpio; - if (gpio == val) - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(geode_gpio_isset); - -void geode_gpio_set_irq(unsigned int group, unsigned int irq) -{ - u32 lo, hi; - - if (group > 7 || irq > 15) - return; - - rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi); - - lo &= ~(0xF << (group * 4)); - lo |= (irq & 0xF) << (group * 4); - - wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi); -} -EXPORT_SYMBOL_GPL(geode_gpio_set_irq); - -void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - u32 offset, shift, val; - - if (gpio >= 24) - offset = GPIO_MAP_W; - else if (gpio >= 16) - offset = GPIO_MAP_Z; - else if (gpio >= 8) - offset = GPIO_MAP_Y; - else - offset = GPIO_MAP_X; - - shift = (gpio % 8) * 4; - - val = inl(base + offset); - - /* Clear whatever was there before */ - val &= ~(0xF << shift); - - /* And set the new value */ - - val |= ((pair & 7) << shift); - - /* Set the PME bit if this is a PME event */ - - if (pme) - val |= (1 << (shift + 3)); - - outl(val, base + offset); -} -EXPORT_SYMBOL_GPL(geode_gpio_setup_event); - -static int __init geode_southbridge_init(void) -{ - if (!is_geode()) - return -ENODEV; - - init_lbars(); - (void) mfgpt_timer_setup(); - return 0; -} - -postcore_initcall(geode_southbridge_init); diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c deleted file mode 100644 index 2a62d843f015..000000000000 --- a/arch/x86/kernel/mfgpt_32.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT) - * - * Copyright (C) 2006, Advanced Micro Devices, Inc. - * Copyright (C) 2007, Andres Salomon - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - * - * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. - */ - -/* - * We are using the 32.768kHz input clock - it's the only one that has the - * ranges we find desirable. The following table lists the suitable - * divisors and the associated Hz, minimum interval and the maximum interval: - * - * Divisor Hz Min Delta (s) Max Delta (s) - * 1 32768 .00048828125 2.000 - * 2 16384 .0009765625 4.000 - * 4 8192 .001953125 8.000 - * 8 4096 .00390625 16.000 - * 16 2048 .0078125 32.000 - * 32 1024 .015625 64.000 - * 64 512 .03125 128.000 - * 128 256 .0625 256.000 - * 256 128 .125 512.000 - */ - -#include -#include -#include -#include - -#define MFGPT_DEFAULT_IRQ 7 - -static struct mfgpt_timer_t { - unsigned int avail:1; -} mfgpt_timers[MFGPT_MAX_TIMERS]; - -/* Selected from the table above */ - -#define MFGPT_DIVISOR 16 -#define MFGPT_SCALE 4 /* divisor = 2^(scale) */ -#define MFGPT_HZ (32768 / MFGPT_DIVISOR) -#define MFGPT_PERIODIC (MFGPT_HZ / HZ) - -/* Allow for disabling of MFGPTs */ -static int disable; -static int __init mfgpt_disable(char *s) -{ - disable = 1; - return 1; -} -__setup("nomfgpt", mfgpt_disable); - -/* Reset the MFGPT timers. This is required by some broken BIOSes which already - * do the same and leave the system in an unstable state. TinyBIOS 0.98 is - * affected at least (0.99 is OK with MFGPT workaround left to off). - */ -static int __init mfgpt_fix(char *s) -{ - u32 val, dummy; - - /* The following udocumented bit resets the MFGPT timers */ - val = 0xFF; dummy = 0; - wrmsr(MSR_MFGPT_SETUP, val, dummy); - return 1; -} -__setup("mfgptfix", mfgpt_fix); - -/* - * Check whether any MFGPTs are available for the kernel to use. In most - * cases, firmware that uses AMD's VSA code will claim all timers during - * bootup; we certainly don't want to take them if they're already in use. - * In other cases (such as with VSAless OpenFirmware), the system firmware - * leaves timers available for us to use. - */ - - -static int timers = -1; - -static void geode_mfgpt_detect(void) -{ - int i; - u16 val; - - timers = 0; - - if (disable) { - printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n"); - goto done; - } - - if (!geode_get_dev_base(GEODE_DEV_MFGPT)) { - printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n"); - goto done; - } - - for (i = 0; i < MFGPT_MAX_TIMERS; i++) { - val = geode_mfgpt_read(i, MFGPT_REG_SETUP); - if (!(val & MFGPT_SETUP_SETUP)) { - mfgpt_timers[i].avail = 1; - timers++; - } - } - -done: - printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers); -} - -int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) -{ - u32 msr, mask, value, dummy; - int shift = (cmp == MFGPT_CMP1) ? 0 : 8; - - if (timer < 0 || timer >= MFGPT_MAX_TIMERS) - return -EIO; - - /* - * The register maps for these are described in sections 6.17.1.x of - * the AMD Geode CS5536 Companion Device Data Book. - */ - switch (event) { - case MFGPT_EVENT_RESET: - /* - * XXX: According to the docs, we cannot reset timers above - * 6; that is, resets for 7 and 8 will be ignored. Is this - * a problem? -dilinger - */ - msr = MSR_MFGPT_NR; - mask = 1 << (timer + 24); - break; - - case MFGPT_EVENT_NMI: - msr = MSR_MFGPT_NR; - mask = 1 << (timer + shift); - break; - - case MFGPT_EVENT_IRQ: - msr = MSR_MFGPT_IRQ; - mask = 1 << (timer + shift); - break; - - default: - return -EIO; - } - - rdmsr(msr, value, dummy); - - if (enable) - value |= mask; - else - value &= ~mask; - - wrmsr(msr, value, dummy); - return 0; -} -EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); - -int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) -{ - u32 zsel, lpc, dummy; - int shift; - - if (timer < 0 || timer >= MFGPT_MAX_TIMERS) - return -EIO; - - /* - * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA - * is using the same CMP of the timer's Siamese twin, the IRQ is set to - * 2, and we mustn't use nor change it. - * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the - * IRQ of the 1st. This can only happen if forcing an IRQ, calling this - * with *irq==0 is safe. Currently there _are_ no 2 drivers. - */ - rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); - shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; - if (((zsel >> shift) & 0xF) == 2) - return -EIO; - - /* Choose IRQ: if none supplied, keep IRQ already set or use default */ - if (!*irq) - *irq = (zsel >> shift) & 0xF; - if (!*irq) - *irq = MFGPT_DEFAULT_IRQ; - - /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ - if (*irq < 1 || *irq == 2 || *irq > 15) - return -EIO; - rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); - if (lpc & (1 << *irq)) - return -EIO; - - /* All chosen and checked - go for it */ - if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) - return -EIO; - if (enable) { - zsel = (zsel & ~(0xF << shift)) | (*irq << shift); - wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); - } - - return 0; -} - -static int mfgpt_get(int timer) -{ - mfgpt_timers[timer].avail = 0; - printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer); - return timer; -} - -int geode_mfgpt_alloc_timer(int timer, int domain) -{ - int i; - - if (timers == -1) { - /* timers haven't been detected yet */ - geode_mfgpt_detect(); - } - - if (!timers) - return -1; - - if (timer >= MFGPT_MAX_TIMERS) - return -1; - - if (timer < 0) { - /* Try to find an available timer */ - for (i = 0; i < MFGPT_MAX_TIMERS; i++) { - if (mfgpt_timers[i].avail) - return mfgpt_get(i); - - if (i == 5 && domain == MFGPT_DOMAIN_WORKING) - break; - } - } else { - /* If they requested a specific timer, try to honor that */ - if (mfgpt_timers[timer].avail) - return mfgpt_get(timer); - } - - /* No timers available - too bad */ - return -1; -} -EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); - - -#ifdef CONFIG_GEODE_MFGPT_TIMER - -/* - * The MFPGT timers on the CS5536 provide us with suitable timers to use - * as clock event sources - not as good as a HPET or APIC, but certainly - * better than the PIT. This isn't a general purpose MFGPT driver, but - * a simplified one designed specifically to act as a clock event source. - * For full details about the MFGPT, please consult the CS5536 data sheet. - */ - -#include -#include - -static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; -static u16 mfgpt_event_clock; - -static int irq; -static int __init mfgpt_setup(char *str) -{ - get_option(&str, &irq); - return 1; -} -__setup("mfgpt_irq=", mfgpt_setup); - -static void mfgpt_disable_timer(u16 clock) -{ - /* avoid races by clearing CMP1 and CMP2 unconditionally */ - geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN | - MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2); -} - -static int mfgpt_next_event(unsigned long, struct clock_event_device *); -static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *); - -static struct clock_event_device mfgpt_clockevent = { - .name = "mfgpt-timer", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = mfgpt_set_mode, - .set_next_event = mfgpt_next_event, - .rating = 250, - .cpumask = cpu_all_mask, - .shift = 32 -}; - -static void mfgpt_start_timer(u16 delta) -{ - geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta); - geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); - - geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, - MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); -} - -static void mfgpt_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - mfgpt_disable_timer(mfgpt_event_clock); - - if (mode == CLOCK_EVT_MODE_PERIODIC) - mfgpt_start_timer(MFGPT_PERIODIC); - - mfgpt_tick_mode = mode; -} - -static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) -{ - mfgpt_start_timer(delta); - return 0; -} - -static irqreturn_t mfgpt_tick(int irq, void *dev_id) -{ - u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP); - - /* See if the interrupt was for us */ - if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1))) - return IRQ_NONE; - - /* Turn off the clock (and clear the event) */ - mfgpt_disable_timer(mfgpt_event_clock); - - if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN) - return IRQ_HANDLED; - - /* Clear the counter */ - geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); - - /* Restart the clock in periodic mode */ - - if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) { - geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, - MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); - } - - mfgpt_clockevent.event_handler(&mfgpt_clockevent); - return IRQ_HANDLED; -} - -static struct irqaction mfgptirq = { - .handler = mfgpt_tick, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, - .name = "mfgpt-timer" -}; - -int __init mfgpt_timer_setup(void) -{ - int timer, ret; - u16 val; - - timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); - if (timer < 0) { - printk(KERN_ERR - "mfgpt-timer: Could not allocate a MFPGT timer\n"); - return -ENODEV; - } - - mfgpt_event_clock = timer; - - /* Set up the IRQ on the MFGPT side */ - if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { - printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); - return -EIO; - } - - /* And register it with the kernel */ - ret = setup_irq(irq, &mfgptirq); - - if (ret) { - printk(KERN_ERR - "mfgpt-timer: Unable to set up the interrupt.\n"); - goto err; - } - - /* Set the clock scale and enable the event mode for CMP2 */ - val = MFGPT_SCALE | (3 << 8); - - geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); - - /* Set up the clock event */ - mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, - mfgpt_clockevent.shift); - mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, - &mfgpt_clockevent); - mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, - &mfgpt_clockevent); - - printk(KERN_INFO - "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", - timer, irq); - clockevents_register_device(&mfgpt_clockevent); - - return 0; - -err: - geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); - printk(KERN_ERR - "mfgpt-timer: Unable to set up the MFGPT clock source\n"); - return -EIO; -} - -#endif diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index df4b82d1348e..57ca339924ef 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -176,7 +176,7 @@ comment "PCI GPIO expanders:" config GPIO_CS5535 tristate "AMD CS5535/CS5536 GPIO support" - depends on PCI && !CS5535_GPIO && !MGEODE_LX + depends on PCI && !CS5535_GPIO help The AMD CS5535 and CS5536 southbridges support 28 GPIO pins that can be used for quite a number of things. The CS5535/6 is found on diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index d6d6d846f0d6..59f4ba1b7034 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -189,7 +189,7 @@ config SGI_XP config CS5535_MFGPT tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" - depends on PCI && !MGEODE_LX + depends on PCI depends on X86 default n help -- cgit v1.2.3 From e7d2860b690d4f3bed6824757c540579638e3d1e Mon Sep 17 00:00:00 2001 From: André Goddard Rosa Date: Mon, 14 Dec 2009 18:01:06 -0800 Subject: tree-wide: convert open calls to remove spaces to skip_spaces() lib function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes use of skip_spaces() defined in lib/string.c for removing leading spaces from strings all over the tree. It decreases lib.a code size by 47 bytes and reuses the function tree-wide: text data bss dec hex filename 64688 584 592 65864 10148 (TOTALS-BEFORE) 64641 584 592 65817 10119 (TOTALS-AFTER) Also, while at it, if we see (*str && isspace(*str)), we can be sure to remove the first condition (*str) as the second one (isspace(*str)) also evaluates to 0 whenever *str == 0, making it redundant. In other words, "a char equals zero is never a space". Julia Lawall tried the semantic patch (http://coccinelle.lip6.fr) below, and found occurrences of this pattern on 3 more files: drivers/leds/led-class.c drivers/leds/ledtrig-timer.c drivers/video/output.c @@ expression str; @@ ( // ignore skip_spaces cases while (*str && isspace(*str)) { \(str++;\|++str;\) } | - *str && isspace(*str) ) Signed-off-by: André Goddard Rosa Cc: Julia Lawall Cc: Martin Schwidefsky Cc: Jeff Dike Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Richard Purdie Cc: Neil Brown Cc: Kyle McMartin Cc: Henrique de Moraes Holschuh Cc: David Howells Cc: Cc: Samuel Ortiz Cc: Patrick McHardy Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/debug.c | 3 ++- arch/um/drivers/mconsole_kern.c | 16 +++++++--------- arch/x86/kernel/cpu/mtrr/if.c | 11 ++++------- drivers/leds/led-class.c | 2 +- drivers/leds/ledtrig-timer.c | 4 ++-- drivers/md/dm-table.c | 6 ++---- drivers/md/md.c | 4 ++-- drivers/parisc/pdc_stable.c | 9 +++------ drivers/platform/x86/thinkpad_acpi.c | 7 ++----- drivers/pnp/interface.c | 36 ++++++++++------------------------- drivers/s390/block/dasd_proc.c | 5 +++-- drivers/video/backlight/lcd.c | 4 ++-- drivers/video/display/display-sysfs.c | 2 +- drivers/video/output.c | 2 +- fs/cachefiles/daemon.c | 4 ++-- fs/ext4/super.c | 7 ++----- kernel/params.c | 8 +++----- lib/argv_split.c | 13 +++---------- lib/dynamic_debug.c | 4 ++-- lib/vsprintf.c | 15 ++++----------- net/irda/irnet/irnet.h | 1 + net/irda/irnet/irnet_ppp.c | 8 +++----- net/netfilter/xt_recent.c | 3 +-- sound/pci/hda/hda_hwdep.c | 7 +++---- 24 files changed, 66 insertions(+), 115 deletions(-) (limited to 'arch/x86') diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 071c81f179ef..0168472b2fdf 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1178,7 +1179,7 @@ debug_get_uint(char *buf) { int rc; - for(; isspace(*buf); buf++); + buf = skip_spaces(buf); rc = simple_strtoul(buf, &buf, 10); if(*buf){ rc = -EINVAL; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index f0fa47f10e6c..51069245b79a 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -131,7 +132,7 @@ void mconsole_proc(struct mc_request *req) char *ptr = req->request.data, *buf; ptr += strlen("proc"); - while (isspace(*ptr)) ptr++; + ptr = skip_spaces(ptr); proc = get_fs_type("proc"); if (proc == NULL) { @@ -212,8 +213,7 @@ void mconsole_proc(struct mc_request *req) char *ptr = req->request.data; ptr += strlen("proc"); - while (isspace(*ptr)) - ptr++; + ptr = skip_spaces(ptr); snprintf(path, sizeof(path), "/proc/%s", ptr); fd = sys_open(path, 0, 0); @@ -560,8 +560,7 @@ void mconsole_config(struct mc_request *req) int err; ptr += strlen("config"); - while (isspace(*ptr)) - ptr++; + ptr = skip_spaces(ptr); dev = mconsole_find_dev(ptr); if (dev == NULL) { mconsole_reply(req, "Bad configuration option", 1, 0); @@ -588,7 +587,7 @@ void mconsole_remove(struct mc_request *req) int err, start, end, n; ptr += strlen("remove"); - while (isspace(*ptr)) ptr++; + ptr = skip_spaces(ptr); dev = mconsole_find_dev(ptr); if (dev == NULL) { mconsole_reply(req, "Bad remove option", 1, 0); @@ -712,7 +711,7 @@ void mconsole_sysrq(struct mc_request *req) char *ptr = req->request.data; ptr += strlen("sysrq"); - while (isspace(*ptr)) ptr++; + ptr = skip_spaces(ptr); /* * With 'b', the system will shut down without a chance to reply, @@ -757,8 +756,7 @@ void mconsole_stack(struct mc_request *req) */ ptr += strlen("stack"); - while (isspace(*ptr)) - ptr++; + ptr = skip_spaces(ptr); /* * Should really check for multiple pids or reject bad args here diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 3c1b12d461d1..e006e56f699c 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #define LINE_SIZE 80 @@ -133,8 +134,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; base = simple_strtoull(line + 5, &ptr, 0); - while (isspace(*ptr)) - ptr++; + ptr = skip_spaces(ptr); if (strncmp(ptr, "size=", 5)) return -EINVAL; @@ -142,14 +142,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) size = simple_strtoull(ptr + 5, &ptr, 0); if ((base & 0xfff) || (size & 0xfff)) return -EINVAL; - while (isspace(*ptr)) - ptr++; + ptr = skip_spaces(ptr); if (strncmp(ptr, "type=", 5)) return -EINVAL; - ptr += 5; - while (isspace(*ptr)) - ptr++; + ptr = skip_spaces(ptr + 5); for (i = 0; i < MTRR_NUM_TYPES; ++i) { if (strcmp(ptr, mtrr_strings[i])) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index f2cc13d76810..782f95822eab 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -50,7 +50,7 @@ static ssize_t led_brightness_store(struct device *dev, unsigned long state = simple_strtoul(buf, &after, 10); size_t count = after - buf; - if (*after && isspace(*after)) + if (isspace(*after)) count++; if (count == size) { diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c index 3b83406de752..38b3378be442 100644 --- a/drivers/leds/ledtrig-timer.c +++ b/drivers/leds/ledtrig-timer.c @@ -83,7 +83,7 @@ static ssize_t led_delay_on_store(struct device *dev, unsigned long state = simple_strtoul(buf, &after, 10); size_t count = after - buf; - if (*after && isspace(*after)) + if (isspace(*after)) count++; if (count == size) { @@ -127,7 +127,7 @@ static ssize_t led_delay_off_store(struct device *dev, unsigned long state = simple_strtoul(buf, &after, 10); size_t count = after - buf; - if (*after && isspace(*after)) + if (isspace(*after)) count++; if (count == size) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1a6cb3c7822e..91976e8fae5f 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -600,11 +601,8 @@ int dm_split_args(int *argc, char ***argvp, char *input) return -ENOMEM; while (1) { - start = end; - /* Skip whitespace */ - while (*start && isspace(*start)) - start++; + start = skip_spaces(end); if (!*start) break; /* success, we hit the end */ diff --git a/drivers/md/md.c b/drivers/md/md.c index e1f3c1715cca..6f9148623a4d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -39,6 +39,7 @@ #include /* for invalidate_bdev */ #include #include +#include #include #include #include @@ -3439,8 +3440,7 @@ bitmap_store(mddev_t *mddev, const char *buf, size_t len) } if (*end && !isspace(*end)) break; bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk); - buf = end; - while (isspace(*buf)) buf++; + buf = skip_spaces(end); } bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ out: diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index 13a64bc081b6..0bc5d474b168 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -779,12 +779,9 @@ static ssize_t pdcs_auto_write(struct kobject *kobj, read_unlock(&pathentry->rw_lock); DPRINTK("%s: flags before: 0x%X\n", __func__, flags); - - temp = in; - - while (*temp && isspace(*temp)) - temp++; - + + temp = skip_spaces(in); + c = *temp++ - '0'; if ((c != 0) && (c != 1)) goto parse_error; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 0ed84806f8ae..cf61d6a8ef6f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1006,11 +1006,8 @@ static int parse_strtoul(const char *buf, { char *endp; - while (*buf && isspace(*buf)) - buf++; - *value = simple_strtoul(buf, &endp, 0); - while (*endp && isspace(*endp)) - endp++; + *value = simple_strtoul(skip_spaces(buf), &endp, 0); + endp = skip_spaces(endp); if (*endp || *value > max) return -EINVAL; diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index c3f1c8e9d254..68b0c04987e4 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -310,8 +310,7 @@ static ssize_t pnp_set_current_resources(struct device *dmdev, goto done; } - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf); if (!strnicmp(buf, "disable", 7)) { retval = pnp_disable_dev(dev); goto done; @@ -353,19 +352,13 @@ static ssize_t pnp_set_current_resources(struct device *dmdev, pnp_init_resources(dev); mutex_lock(&pnp_res_mutex); while (1) { - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf); if (!strnicmp(buf, "io", 2)) { - buf += 2; - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf + 2); start = simple_strtoul(buf, &buf, 0); - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf); if (*buf == '-') { - buf += 1; - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf + 1); end = simple_strtoul(buf, &buf, 0); } else end = start; @@ -373,16 +366,11 @@ static ssize_t pnp_set_current_resources(struct device *dmdev, continue; } if (!strnicmp(buf, "mem", 3)) { - buf += 3; - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf + 3); start = simple_strtoul(buf, &buf, 0); - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf); if (*buf == '-') { - buf += 1; - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf + 1); end = simple_strtoul(buf, &buf, 0); } else end = start; @@ -390,17 +378,13 @@ static ssize_t pnp_set_current_resources(struct device *dmdev, continue; } if (!strnicmp(buf, "irq", 3)) { - buf += 3; - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf + 3); start = simple_strtoul(buf, &buf, 0); pnp_add_irq_resource(dev, start, 0); continue; } if (!strnicmp(buf, "dma", 3)) { - buf += 3; - while (isspace(*buf)) - ++buf; + buf = skip_spaces(buf + 3); start = simple_strtoul(buf, &buf, 0); pnp_add_dma_resource(dev, start, 0); continue; diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 5f23eca82804..6315fbd8e68b 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -14,6 +14,7 @@ #define KMSG_COMPONENT "dasd" #include +#include #include #include #include @@ -272,10 +273,10 @@ dasd_statistics_write(struct file *file, const char __user *user_buf, DBF_EVENT(DBF_DEBUG, "/proc/dasd/statictics: '%s'\n", buffer); /* check for valid verbs */ - for (str = buffer; isspace(*str); str++); + str = skip_spaces(buffer); if (strncmp(str, "set", 3) == 0 && isspace(str[3])) { /* 'set xxx' was given */ - for (str = str + 4; isspace(*str); str++); + str = skip_spaces(str + 4); if (strcmp(str, "on") == 0) { /* switch on statistics profiling */ dasd_profile_level = DASD_PROFILE_ON; diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index a482dd7b0311..9b3be74cee5a 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -101,7 +101,7 @@ static ssize_t lcd_store_power(struct device *dev, int power = simple_strtoul(buf, &endp, 0); size_t size = endp - buf; - if (*endp && isspace(*endp)) + if (isspace(*endp)) size++; if (size != count) return -EINVAL; @@ -140,7 +140,7 @@ static ssize_t lcd_store_contrast(struct device *dev, int contrast = simple_strtoul(buf, &endp, 0); size_t size = endp - buf; - if (*endp && isspace(*endp)) + if (isspace(*endp)) size++; if (size != count) return -EINVAL; diff --git a/drivers/video/display/display-sysfs.c b/drivers/video/display/display-sysfs.c index 4830b1bf51e5..80abbf323b99 100644 --- a/drivers/video/display/display-sysfs.c +++ b/drivers/video/display/display-sysfs.c @@ -67,7 +67,7 @@ static ssize_t display_store_contrast(struct device *dev, contrast = simple_strtoul(buf, &endp, 0); size = endp - buf; - if (*endp && isspace(*endp)) + if (isspace(*endp)) size++; if (size != count) diff --git a/drivers/video/output.c b/drivers/video/output.c index 5e6439ae7394..5137aa016b83 100644 --- a/drivers/video/output.c +++ b/drivers/video/output.c @@ -50,7 +50,7 @@ static ssize_t video_output_store_state(struct device *dev, int request_state = simple_strtoul(buf,&endp,0); size_t size = endp - buf; - if (*endp && isspace(*endp)) + if (isspace(*endp)) size++; if (size != count) return -EINVAL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 4618516dd994..c2413561ea75 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "internal.h" @@ -257,8 +258,7 @@ static ssize_t cachefiles_daemon_write(struct file *file, if (args == data) goto error; *args = '\0'; - for (args++; isspace(*args); args++) - continue; + args = skip_spaces(++args); } /* run the appropriate command handler */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 768c111a77ec..827bde1f2594 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2137,11 +2137,8 @@ static int parse_strtoul(const char *buf, { char *endp; - while (*buf && isspace(*buf)) - buf++; - *value = simple_strtoul(buf, &endp, 0); - while (*endp && isspace(*endp)) - endp++; + *value = simple_strtoul(skip_spaces(buf), &endp, 0); + endp = skip_spaces(endp); if (*endp || *value > max) return -EINVAL; diff --git a/kernel/params.c b/kernel/params.c index d656c276508d..cf1b69183127 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -24,6 +24,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -122,9 +123,7 @@ static char *next_arg(char *args, char **param, char **val) next = args + i; /* Chew up trailing spaces. */ - while (isspace(*next)) - next++; - return next; + return skip_spaces(next); } /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ @@ -139,8 +138,7 @@ int parse_args(const char *name, DEBUGP("Parsing ARGS: %s\n", args); /* Chew leading spaces */ - while (isspace(*args)) - args++; + args = skip_spaces(args); while (*args) { int ret; diff --git a/lib/argv_split.c b/lib/argv_split.c index 5205a8dae5bc..4b1b083f219c 100644 --- a/lib/argv_split.c +++ b/lib/argv_split.c @@ -4,17 +4,10 @@ #include #include +#include #include #include -static const char *skip_sep(const char *cp) -{ - while (*cp && isspace(*cp)) - cp++; - - return cp; -} - static const char *skip_arg(const char *cp) { while (*cp && !isspace(*cp)) @@ -28,7 +21,7 @@ static int count_argc(const char *str) int count = 0; while (*str) { - str = skip_sep(str); + str = skip_spaces(str); if (*str) { count++; str = skip_arg(str); @@ -82,7 +75,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp) argvp = argv; while (*str) { - str = skip_sep(str); + str = skip_spaces(str); if (*str) { const char *p = str; diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e22c148e4b7f..f93502915988 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -209,8 +210,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) char *end; /* Skip leading whitespace */ - while (*buf && isspace(*buf)) - buf++; + buf = skip_spaces(buf); if (!*buf) break; /* oh, it was trailing whitespace */ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c50733a690f0..7857d4dd62d3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1766,13 +1766,6 @@ EXPORT_SYMBOL_GPL(bprintf); #endif /* CONFIG_BINARY_PRINTF */ -static noinline char *skip_space(const char *str) -{ - while (isspace(*str)) - ++str; - return (char *)str; -} - /** * vsscanf - Unformat a buffer into a list of arguments * @buf: input buffer @@ -1794,8 +1787,8 @@ int vsscanf(const char *buf, const char *fmt, va_list args) * white space, including none, in the input. */ if (isspace(*fmt)) { - fmt = skip_space(fmt); - str = skip_space(str); + fmt = skip_spaces(++fmt); + str = skip_spaces(str); } /* anything that is not a conversion must match exactly */ @@ -1865,7 +1858,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) if (field_width == -1) field_width = INT_MAX; /* first, skip leading white space in buffer */ - str = skip_space(str); + str = skip_spaces(str); /* now copy until next white space */ while (*str && !isspace(*str) && field_width--) @@ -1907,7 +1900,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) /* have some sort of integer conversion. * first, skip white space in buffer. */ - str = skip_space(str); + str = skip_spaces(str); digit = *str; if (is_sign && digit == '-') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad30..4300df35d37d 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -249,6 +249,7 @@ #include #include #include /* isspace() */ +#include /* skip_spaces() */ #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7dea882dbb75..156020d138b5 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -76,9 +76,8 @@ irnet_ctrl_write(irnet_socket * ap, /* Look at the next command */ start = next; - /* Scrap whitespaces before the command */ - while(isspace(*start)) - start++; + /* Scrap whitespaces before the command */ + start = skip_spaces(start); /* ',' is our command separator */ next = strchr(start, ','); @@ -133,8 +132,7 @@ irnet_ctrl_write(irnet_socket * ap, char * endp; /* Scrap whitespaces before the command */ - while(isspace(*begp)) - begp++; + begp = skip_spaces(begp); /* Convert argument to a number (last arg is the base) */ addr = simple_strtoul(begp, &endp, 16); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index eb0ceb846527..fc70a49c0afd 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -482,8 +482,7 @@ static ssize_t recent_old_proc_write(struct file *file, if (copy_from_user(buf, input, size)) return -EFAULT; - while (isspace(*c)) - c++; + c = skip_spaces(c); if (size - (c - buf) < 5) return c - buf; diff --git a/sound/pci/hda/hda_hwdep.c b/sound/pci/hda/hda_hwdep.c index d24328661c6a..40ccb419b6e9 100644 --- a/sound/pci/hda/hda_hwdep.c +++ b/sound/pci/hda/hda_hwdep.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include "hda_codec.h" @@ -428,8 +429,7 @@ static int parse_hints(struct hda_codec *codec, const char *buf) char *key, *val; struct hda_hint *hint; - while (isspace(*buf)) - buf++; + buf = skip_spaces(buf); if (!*buf || *buf == '#' || *buf == '\n') return 0; if (*buf == '=') @@ -444,8 +444,7 @@ static int parse_hints(struct hda_codec *codec, const char *buf) return -EINVAL; } *val++ = 0; - while (isspace(*val)) - val++; + val = skip_spaces(val); remove_trail_spaces(key); remove_trail_spaces(val); hint = get_hint(codec, key); -- cgit v1.2.3 From 23637568ad0c9b5ab0ad27d2f2f26d1e9282c527 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 13 Dec 2009 16:04:38 -0600 Subject: x86: Fix kprobes build with non-gawk awk The instruction attribute table generator fails when run by mawk or original-awk: $ mawk -f arch/x86/tools/gen-insn-attr-x86.awk \ arch/x86/lib/x86-opcode-map.txt > /dev/null Semantic error at 240: Second IMM error $ echo $? 1 Line 240 contains "c8: ENTER Iw,Ib", which indicates that this instruction has two immediate operands, the second of which is one byte. The script loops through the immediate operands using a for loop. Unfortunately, there is no guarantee in awk that a for (variable in array) loop will return the indices in increasing order. Internally, both original-awk and mawk iterate over a hash table for this purpose, and both implementations happen to produce the index 2 before 1. The supposed second immediate operand is more than one byte wide, producing the error. So loop over the indices in increasing order instead. As a side-effect, with mawk this means the silly two-entry hash table never has to be built. Signed-off-by: Jonathan Nieder Acked-by Masami Hiramatsu Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091213220437.GA27718@progeny.tock> Signed-off-by: Ingo Molnar --- arch/x86/tools/gen-insn-attr-x86.awk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index e34e92a28eb6..7a6850683c34 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -226,12 +226,12 @@ function add_flags(old,new) { } # convert operands to flags. -function convert_operands(opnd, i,imm,mod) +function convert_operands(count,opnd, i,j,imm,mod) { imm = null mod = null - for (i in opnd) { - i = opnd[i] + for (j = 1; j <= count; j++) { + i = opnd[j] if (match(i, imm_expr) == 1) { if (!imm_flag[i]) semantic_error("Unknown imm opnd: " i) @@ -282,8 +282,8 @@ function convert_operands(opnd, i,imm,mod) # parse one opcode if (match($i, opnd_expr)) { opnd = $i - split($(i++), opnds, ",") - flags = convert_operands(opnds) + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) } if (match($i, ext_expr)) ext = $(i++) -- cgit v1.2.3 From 0b962d473af32ec334df271b54ff4973cb2b4c73 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 15 Dec 2009 15:13:07 -0800 Subject: x86, msr/cpuid: Register enough minors for the MSR and CPUID drivers register_chrdev() hardcodes registering 256 minors, presumably to avoid breaking old drivers. However, we need to register enough minors so that we have all possible CPUs. checkpatch warns on this patch, but the patch is correct: NR_CPUS here is a static *upper bound* on the *maximum CPU index* (not *number of CPUs!*) and that is what we want. Reported-and-tested-by: Russ Anderson Cc: Tejun Heo Cc: Alan Cox Cc: Takashi Iwai Cc: Alexander Viro Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/kernel/cpuid.c | 5 +++-- arch/x86/kernel/msr.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 7ef24a796992..cb27fd6136c9 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -187,7 +187,8 @@ static int __init cpuid_init(void) int i, err = 0; i = 0; - if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { + if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS, + "cpu/cpuid", &cpuid_fops)) { printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", CPUID_MAJOR); err = -EBUSY; @@ -216,7 +217,7 @@ out_class: } class_destroy(cpuid_class); out_chrdev: - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); out: return err; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 572b07eee3f4..4bd93c9b2b27 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -246,7 +246,7 @@ static int __init msr_init(void) int i, err = 0; i = 0; - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { + if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { printk(KERN_ERR "msr: unable to get major %d for msr\n", MSR_MAJOR); err = -EBUSY; @@ -274,7 +274,7 @@ out_class: msr_device_destroy(i); class_destroy(msr_class); out_chrdev: - unregister_chrdev(MSR_MAJOR, "cpu/msr"); + __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); out: return err; } -- cgit v1.2.3 From 5df974009fe513c664303de24725ea0f8b47f12e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 16 Dec 2009 13:48:04 +0800 Subject: x86: Add IA32_TSC_AUX MSR and use it Clean up write_tsc() and write_tscp_aux() by replacing hardcoded values. No change in functionality. Signed-off-by: Sheng Yang Cc: Avi Kivity Cc: Marcelo Tosatti LKML-Reference: <1260942485-19156-4-git-send-email-sheng@linux.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/msr.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4ffe09b2ad75..ac98d2914ebf 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -12,6 +12,7 @@ #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ /* EFER bits: */ #define _EFER_SCE 0 /* SYSCALL/SYSRET */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 2d228fc9b4b7..cf985aa00660 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -240,9 +240,9 @@ do { \ #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ (u32)((val) >> 32)) -#define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) +#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) -#define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) +#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) struct msr *msrs_alloc(void); void msrs_free(struct msr *msrs); -- cgit v1.2.3 From 7f38551fc3ff0e17a38d6f3f0f8831380a88f3cc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:20 -0800 Subject: ptrace: x86: implement user_single_step_siginfo() Suggested by Roland. Implement user_single_step_siginfo() for x86. Extract this code from send_sigtrap(). Since x86 calls tracehook_report_syscall_exit(step => 0) the new helper is not used yet. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/ptrace.h | 2 ++ arch/x86/kernel/ptrace.c | 30 +++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 3d11fd0f44c5..9d369f680321 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -292,6 +292,8 @@ extern void user_enable_block_step(struct task_struct *); #define arch_has_block_step() (boot_cpu_data.x86 >= 6) #endif +#define ARCH_HAS_USER_SINGLE_STEP_INFO + struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7079ddaf0731..77b60085a810 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1676,21 +1676,33 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) +static void fill_sigtrap_info(struct task_struct *tsk, + struct pt_regs *regs, + int error_code, int si_code, + struct siginfo *info) { - struct siginfo info; - tsk->thread.trap_no = 1; tsk->thread.error_code = error_code; - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = si_code; + memset(info, 0, sizeof(*info)); + info->si_signo = SIGTRAP; + info->si_code = si_code; + info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; +} + +void user_single_step_siginfo(struct task_struct *tsk, + struct pt_regs *regs, + struct siginfo *info) +{ + fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); +} - /* User-mode ip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) +{ + struct siginfo info; + fill_sigtrap_info(tsk, regs, error_code, si_code, &info); /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); } -- cgit v1.2.3 From d51965037325e51f6cd68583413243c3573e47b0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Dec 2009 16:47:21 -0800 Subject: ptrace: x86: change syscall_trace_leave() to rely on tracehook when stepping Suggested by Roland. Unlike powepc, x86 always calls tracehook_report_syscall_exit(step) with step = 0, and sends the trap by hand. This results in unnecessary SIGTRAP when PTRACE_SINGLESTEP follows the syscall-exit stop. Change syscall_trace_leave() to pass the correct "step" argument to tracehook and remove the send_sigtrap() logic. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/ptrace.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77b60085a810..2779321046bd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1767,29 +1767,22 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) asmregparm void syscall_trace_leave(struct pt_regs *regs) { + bool step; + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->ax); - if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); - /* * If TIF_SYSCALL_EMU is set, we only get here because of * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). * We already reported this syscall instruction in - * syscall_trace_enter(), so don't do any more now. - */ - if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) - return; - - /* - * If we are single-stepping, synthesize a trap to follow the - * system call instruction. + * syscall_trace_enter(). */ - if (test_thread_flag(TIF_SINGLESTEP) && - tracehook_consider_fatal_signal(current, SIGTRAP)) - send_sigtrap(current, regs, 0, TRAP_BRKPT); + step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && + !test_thread_flag(TIF_SYSCALL_EMU); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); } -- cgit v1.2.3 From 698ba7b5a3a7be772922340fade365c675b8243f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Dec 2009 16:47:37 -0800 Subject: elf: kill USE_ELF_CORE_DUMP Currently all architectures but microblaze unconditionally define USE_ELF_CORE_DUMP. The microblaze omission seems like an error to me, so let's kill this ifdef and make sure we are the same everywhere. Signed-off-by: Christoph Hellwig Acked-by: Hugh Dickins Cc: Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/elf.h | 1 - arch/arm/include/asm/elf.h | 1 - arch/avr32/include/asm/elf.h | 1 - arch/blackfin/include/asm/elf.h | 1 - arch/cris/include/asm/elf.h | 2 -- arch/frv/include/asm/elf.h | 1 - arch/h8300/include/asm/elf.h | 1 - arch/ia64/ia32/elfcore32.h | 2 -- arch/ia64/include/asm/elf.h | 1 - arch/m32r/include/asm/elf.h | 1 - arch/m68k/include/asm/elf.h | 1 - arch/microblaze/include/asm/elf.h | 1 - arch/mips/include/asm/elf.h | 1 - arch/mn10300/include/asm/elf.h | 1 - arch/parisc/include/asm/elf.h | 1 - arch/powerpc/include/asm/elf.h | 1 - arch/s390/include/asm/elf.h | 1 - arch/score/include/asm/elf.h | 1 - arch/sh/include/asm/elf.h | 1 - arch/sparc/include/asm/elf_32.h | 2 -- arch/sparc/include/asm/elf_64.h | 1 - arch/um/sys-i386/asm/elf.h | 1 - arch/um/sys-ppc/asm/elf.h | 2 -- arch/um/sys-x86_64/asm/elf.h | 1 - arch/x86/include/asm/elf.h | 1 - arch/xtensa/include/asm/elf.h | 1 - fs/binfmt_elf.c | 11 +++-------- fs/binfmt_elf_fdpic.c | 8 ++++---- fs/proc/base.c | 4 ++-- 29 files changed, 9 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 5c75c1b2352a..9baae8afe8a3 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -81,7 +81,6 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_ALPHA -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 8192 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 6aac3f5bb2f3..a399bb5730f1 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -101,7 +101,6 @@ extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int); int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); #define ELF_CORE_COPY_TASK_REGS dump_task_regs -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h index d5d1d41c600a..3b3159b710d4 100644 --- a/arch/avr32/include/asm/elf.h +++ b/arch/avr32/include/asm/elf.h @@ -77,7 +77,6 @@ typedef struct user_fpu_struct elf_fpregset_t; #endif #define ELF_ARCH EM_AVR32 -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/blackfin/include/asm/elf.h b/arch/blackfin/include/asm/elf.h index 8e0764c81eaf..5b50f0ecacf8 100644 --- a/arch/blackfin/include/asm/elf.h +++ b/arch/blackfin/include/asm/elf.h @@ -55,7 +55,6 @@ do { \ _regs->p2 = _dynamic_addr; \ } while(0) -#define USE_ELF_CORE_DUMP #define ELF_FDPIC_CORE_EFLAGS EF_BFIN_FDPIC #define ELF_EXEC_PAGESIZE 4096 diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h index 0f51b10b9f4f..8a3d8e2b33c1 100644 --- a/arch/cris/include/asm/elf.h +++ b/arch/cris/include/asm/elf.h @@ -64,8 +64,6 @@ typedef unsigned long elf_fpregset_t; #define EF_CRIS_VARIANT_COMMON_V10_V32 0x00000004 /* End of excerpt from {binutils}/include/elf/cris.h. */ -#define USE_ELF_CORE_DUMP - #define ELF_EXEC_PAGESIZE 8192 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h index 7bbf6e47f8c8..c3819804a74b 100644 --- a/arch/frv/include/asm/elf.h +++ b/arch/frv/include/asm/elf.h @@ -115,7 +115,6 @@ do { \ __kernel_frame0_ptr->gr29 = 0; \ } while(0) -#define USE_ELF_CORE_DUMP #define CORE_DUMP_USE_REGSET #define ELF_FDPIC_CORE_EFLAGS EF_FRV_FDPIC #define ELF_EXEC_PAGESIZE 16384 diff --git a/arch/h8300/include/asm/elf.h b/arch/h8300/include/asm/elf.h index 94e2284c8816..c24fa250d653 100644 --- a/arch/h8300/include/asm/elf.h +++ b/arch/h8300/include/asm/elf.h @@ -34,7 +34,6 @@ typedef unsigned long elf_fpregset_t; #define ELF_PLAT_INIT(_r) _r->er1 = 0 -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h index 9a3abf58cea3..657725742617 100644 --- a/arch/ia64/ia32/elfcore32.h +++ b/arch/ia64/ia32/elfcore32.h @@ -11,8 +11,6 @@ #include #include -#define USE_ELF_CORE_DUMP 1 - /* Override elfcore.h */ #define _LINUX_ELFCORE_H 1 typedef unsigned int elf_greg_t; diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 86eddee029cb..e14108b19c09 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -25,7 +25,6 @@ #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_IA_64 -#define USE_ELF_CORE_DUMP #define CORE_DUMP_USE_REGSET /* Least-significant four bits of ELF header's e_flags are OS-specific. The bits are diff --git a/arch/m32r/include/asm/elf.h b/arch/m32r/include/asm/elf.h index 0cc34c94bf2b..2f85412ef730 100644 --- a/arch/m32r/include/asm/elf.h +++ b/arch/m32r/include/asm/elf.h @@ -102,7 +102,6 @@ typedef elf_fpreg_t elf_fpregset_t; */ #define ELF_PLAT_INIT(_r, load_addr) (_r)->r0 = 0 -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE PAGE_SIZE /* diff --git a/arch/m68k/include/asm/elf.h b/arch/m68k/include/asm/elf.h index 0b0f49eb876b..01c193d91412 100644 --- a/arch/m68k/include/asm/elf.h +++ b/arch/m68k/include/asm/elf.h @@ -59,7 +59,6 @@ typedef struct user_m68kfp_struct elf_fpregset_t; is actually used on ASV. */ #define ELF_PLAT_INIT(_r, load_addr) _r->a1 = 0 -#define USE_ELF_CORE_DUMP #ifndef CONFIG_SUN3 #define ELF_EXEC_PAGESIZE 4096 #else diff --git a/arch/microblaze/include/asm/elf.h b/arch/microblaze/include/asm/elf.h index f92fc0dda006..7d4acf2b278e 100644 --- a/arch/microblaze/include/asm/elf.h +++ b/arch/microblaze/include/asm/elf.h @@ -77,7 +77,6 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #define ELF_DATA ELFDATA2MSB #endif -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 7990694cda22..7a6a35dbe529 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -326,7 +326,6 @@ extern int dump_task_fpu(struct task_struct *, elf_fpregset_t *); #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) \ dump_task_fpu(tsk, elf_fpregs) -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE PAGE_SIZE /* This yields a mask that user programs can use to figure out what diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h index 75a70aa9fd6f..e5fa97cd9a14 100644 --- a/arch/mn10300/include/asm/elf.h +++ b/arch/mn10300/include/asm/elf.h @@ -77,7 +77,6 @@ do { \ _ur->a1 = 0; _ur->a0 = 0; _ur->d1 = 0; _ur->d0 = 0; \ } while (0) -#define USE_ELF_CORE_DUMP #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 9c802eb4be84..19f6cb1a4a1c 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -328,7 +328,6 @@ struct pt_regs; /* forward declaration... */ such function. */ #define ELF_PLAT_INIT(_r, load_addr) _r->gr[23] = 0 -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 014a624f4c8e..17828ad411eb 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -170,7 +170,6 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; #define elf_check_arch(x) ((x)->e_machine == ELF_ARCH) #define compat_elf_check_arch(x) ((x)->e_machine == EM_PPC) -#define USE_ELF_CORE_DUMP #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index e885442c1dfe..354d42616c7e 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -155,7 +155,6 @@ extern unsigned int vdso_enabled; } while (0) #define CORE_DUMP_USE_REGSET -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/score/include/asm/elf.h b/arch/score/include/asm/elf.h index 43526d9fda93..f478ce94181f 100644 --- a/arch/score/include/asm/elf.h +++ b/arch/score/include/asm/elf.h @@ -61,7 +61,6 @@ struct task_struct; struct pt_regs; #define CORE_DUMP_USE_REGSET -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE PAGE_SIZE /* This yields a mask that user programs can use to figure out what diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index ccb1d93bb043..ac04255022b6 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -114,7 +114,6 @@ typedef struct user_fpu_struct elf_fpregset_t; */ #define CORE_DUMP_USE_REGSET -#define USE_ELF_CORE_DUMP #define ELF_FDPIC_CORE_EFLAGS EF_SH_FDPIC #define ELF_EXEC_PAGESIZE PAGE_SIZE diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h index 381a1b5256d6..4269ca6ad18a 100644 --- a/arch/sparc/include/asm/elf_32.h +++ b/arch/sparc/include/asm/elf_32.h @@ -104,8 +104,6 @@ typedef struct { #define ELF_CLASS ELFCLASS32 #define ELF_DATA ELFDATA2MSB -#define USE_ELF_CORE_DUMP - #define ELF_EXEC_PAGESIZE 4096 diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index d42e393078c4..ff66bb88537b 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -152,7 +152,6 @@ typedef struct { (x)->e_machine == EM_SPARC32PLUS) #define compat_start_thread start_thread32 -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE PAGE_SIZE /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h index d0da9d7c5371..770885472ed4 100644 --- a/arch/um/sys-i386/asm/elf.h +++ b/arch/um/sys-i386/asm/elf.h @@ -48,7 +48,6 @@ typedef struct user_i387_struct elf_fpregset_t; PT_REGS_EAX(regs) = 0; \ } while (0) -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) diff --git a/arch/um/sys-ppc/asm/elf.h b/arch/um/sys-ppc/asm/elf.h index af9463cd8ce5..8aacaf56508d 100644 --- a/arch/um/sys-ppc/asm/elf.h +++ b/arch/um/sys-ppc/asm/elf.h @@ -17,8 +17,6 @@ extern long elf_aux_hwcap; #define ELF_CLASS ELFCLASS32 #endif -#define USE_ELF_CORE_DUMP - #define R_386_NONE 0 #define R_386_32 1 #define R_386_PC32 2 diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/um/sys-x86_64/asm/elf.h index 04b9e87c8dad..49655c83efd2 100644 --- a/arch/um/sys-x86_64/asm/elf.h +++ b/arch/um/sys-x86_64/asm/elf.h @@ -104,7 +104,6 @@ extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); clear_thread_flag(TIF_IA32); #endif -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 8a024babe5e6..b4501ee223ad 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -239,7 +239,6 @@ extern int force_personality32; #endif /* !CONFIG_X86_32 */ #define CORE_DUMP_USE_REGSET -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/xtensa/include/asm/elf.h b/arch/xtensa/include/asm/elf.h index c3f53e755ca5..5eb6d695e987 100644 --- a/arch/xtensa/include/asm/elf.h +++ b/arch/xtensa/include/asm/elf.h @@ -123,7 +123,6 @@ extern void xtensa_elf_core_copy_regs (xtensa_gregset_t *, struct pt_regs *); #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_XTENSA -#define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE PAGE_SIZE /* diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d15ea1790bfb..97b6e9efeb7f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -44,7 +44,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, * If we don't support core dumping, then supply a NULL so we * don't even try. */ -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); #else #define elf_core_dump NULL @@ -1101,12 +1101,7 @@ out: return error; } -/* - * Note that some platforms still use traditional core dumps and not - * the ELF core dump. Each platform can select it as appropriate. - */ -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) - +#ifdef CONFIG_ELF_CORE /* * ELF core dumper * @@ -2063,7 +2058,7 @@ out: return has_dumped; } -#endif /* USE_ELF_CORE_DUMP */ +#endif /* CONFIG_ELF_CORE */ static int __init init_elf_binfmt(void) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 79d2b1aa389f..7b055385db8e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -75,14 +75,14 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *, static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *, struct file *, struct mm_struct *); -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit); #endif static struct linux_binfmt elf_fdpic_format = { .module = THIS_MODULE, .load_binary = load_elf_fdpic_binary, -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE .core_dump = elf_fdpic_core_dump, #endif .min_coredump = ELF_EXEC_PAGESIZE, @@ -1201,7 +1201,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, * * Modelled on fs/binfmt_elf.c core dumper */ -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE /* * These are the only things you should do on a core-file: use only these @@ -1826,4 +1826,4 @@ cleanup: #undef NUM_NOTES } -#endif /* USE_ELF_CORE_DUMP */ +#endif /* CONFIG_ELF_CORE */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 4df4a464a919..18d5cc62d8ed 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2266,7 +2266,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = { #endif -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -2623,7 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING -- cgit v1.2.3 From ac2b3e67dd59b8c6ef8c199641444c6ea03535a6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 15 Dec 2009 16:47:43 -0800 Subject: dma-mapping: fix off-by-one error in dma_capable() dma_mask is, when interpreted as address, the last valid byte, and hence comparison msut also be done using the last valid of the buffer in question. Also fix the open-coded instances in lib/swiotlb.c. Signed-off-by: Jan Beulich Cc: FUJITA Tomonori Cc: Becky Bruce Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/dma-mapping.h | 2 +- arch/powerpc/include/asm/dma-mapping.h | 2 +- arch/x86/include/asm/dma-mapping.h | 2 +- lib/swiotlb.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 8d3c79cd81e7..7d09a09cdaad 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -73,7 +73,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return 0; - return addr + size <= *dev->dma_mask; + return addr + size - 1 <= *dev->dma_mask; } static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index e281daebddca..80a973bb9e71 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -197,7 +197,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return 0; - return addr + size <= *dev->dma_mask; + return addr + size - 1 <= *dev->dma_mask; } static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0f6c02f3b7d4..ac91eed21061 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -67,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) if (!dev->dma_mask) return 0; - return addr + size <= *dev->dma_mask; + return addr + size - 1 <= *dev->dma_mask; } static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 5bc01803f8f8..437eedb5a53b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -549,7 +549,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_mask = hwdev->coherent_dma_mask; ret = (void *)__get_free_pages(flags, order); - if (ret && swiotlb_virt_to_bus(hwdev, ret) + size > dma_mask) { + if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { /* * The allocated memory isn't reachable by the device. */ @@ -571,7 +571,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dev_addr = swiotlb_virt_to_bus(hwdev, ret); /* Confirm address can be DMA'd by device */ - if (dev_addr + size > dma_mask) { + if (dev_addr + size - 1 > dma_mask) { printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", (unsigned long long)dma_mask, (unsigned long long)dev_addr); -- cgit v1.2.3 From 729d69e6995fc4dea8ff70df256a7d4034a3d21d Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 15 Dec 2009 16:47:52 -0800 Subject: x86: uv: introduce a means to translate from gpa -> socket_paddr The UV BIOS has been updated to implement some of our interface functionality differently than originally expected. These patches update the kernel to the bios implementation and include a few minor bug fixes which prevent us from doing significant testing on real hardware. This patch: For SGI UV systems, translate from a global physical address back to a socket physical address. This does nothing to ensure the socket physical address is actually addressable by the kernel. That is the responsibility of the user of the function. Signed-off-by: Robin Holt Cc: Jack Steiner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uv/uv_hub.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d1414af98559..0cc955f16aec 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -232,6 +232,19 @@ static inline unsigned long uv_gpa(void *v) return uv_soc_phys_ram_to_gpa(__pa(v)); } +/* UV global physical address --> socket phys RAM */ +static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) +{ + unsigned long paddr = gpa & uv_hub_info->gpa_mask; + unsigned long remap_base = uv_hub_info->lowmem_remap_base; + unsigned long remap_top = uv_hub_info->lowmem_remap_top; + + if (paddr >= remap_base && paddr < remap_base + remap_top) + paddr -= remap_base; + return paddr; +} + + /* gnode -> pnode */ static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) { -- cgit v1.2.3 From fae419f2abd15ab7d1cd1413e6683a276a4e14e2 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 15 Dec 2009 16:47:54 -0800 Subject: x86: uv: introduce uv_gpa_is_mmr Provide a mechanism for determining if a global physical address is pointing to a UV hub MMR. Signed-off-by: Robin Holt Cc: Jack Steiner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uv/uv_hub.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 0cc955f16aec..8f1332bbfd72 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -232,6 +232,13 @@ static inline unsigned long uv_gpa(void *v) return uv_soc_phys_ram_to_gpa(__pa(v)); } +/* Top two bits indicate the requested address is in MMR space. */ +static inline int +uv_gpa_in_mmr_space(unsigned long gpa) +{ + return (gpa >> 62) == 0x3UL; +} + /* UV global physical address --> socket phys RAM */ static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) { -- cgit v1.2.3 From c2c9f115741453715d6b4da1cd2de65af8c7ad86 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 15 Dec 2009 16:47:56 -0800 Subject: x86: uv: update XPC to handle updated BIOS interface The UV BIOS has moved the location of some of their pointers to the "partition reserved page" from memory into a uv hub MMR. The GRU does not support bcopy operations from MMR space so we need to special case the MMR addresses using VLOAD operations. Additionally, the BIOS call for registering a message queue watchlist has removed the 'blade' value and eliminated the structure that was being passed in. This is also reflected in this patch. Signed-off-by: Robin Holt Cc: Jack Steiner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uv/bios.h | 11 +---------- arch/x86/kernel/bios_uv.c | 8 ++------ drivers/misc/sgi-xp/xp_uv.c | 23 +++++++++++++++++++++++ drivers/misc/sgi-xp/xpc_partition.c | 13 +++++++++---- drivers/misc/sgi-xp/xpc_uv.c | 27 +++++++++++++++------------ 5 files changed, 50 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 7ed17ff502b9..2751f3075d8b 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -76,15 +76,6 @@ union partition_info_u { }; }; -union uv_watchlist_u { - u64 val; - struct { - u64 blade : 16, - size : 32, - filler : 16; - }; -}; - enum uv_memprotect { UV_MEMPROT_RESTRICT_ACCESS, UV_MEMPROT_ALLOW_AMO, @@ -100,7 +91,7 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); extern s64 uv_bios_freq_base(u64, u64 *); -extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int, +extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, unsigned long *); extern int uv_bios_mq_watchlist_free(int, int); extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 63a88e1f987d..b0206a211b09 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -101,21 +101,17 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, } int -uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, +uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, unsigned long *intr_mmr_offset) { - union uv_watchlist_u size_blade; u64 watchlist; s64 ret; - size_blade.size = mq_size; - size_blade.blade = blade; - /* * bios returns watchlist number or negative error number. */ ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, - size_blade.val, (u64)intr_mmr_offset, + mq_size, (u64)intr_mmr_offset, (u64)&watchlist, 0); if (ret < BIOS_STATUS_SUCCESS) return ret; diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c index 1e61f8a61a30..a0d093274dc0 100644 --- a/drivers/misc/sgi-xp/xp_uv.c +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -41,12 +41,35 @@ xp_socket_pa_uv(unsigned long gpa) return uv_gpa_to_soc_phys_ram(gpa); } +static enum xp_retval +xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa)); + + BUG_ON(!uv_gpa_in_mmr_space(src_gpa)); + BUG_ON(len != 8); + + ret = gru_read_gpa(dst_va, src_gpa); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + + static enum xp_retval xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa, size_t len) { int ret; + if (uv_gpa_in_mmr_space(src_gpa)) + return xp_remote_mmr_read(dst_gpa, src_gpa, len); + ret = gru_copy_gpa(dst_gpa, src_gpa, len); if (ret == 0) return xpSuccess; diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 65877bc5edaa..9a6268c89fdd 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -18,6 +18,7 @@ #include #include #include "xpc.h" +#include /* XPC is exiting flag */ int xpc_exiting; @@ -92,8 +93,12 @@ xpc_get_rsvd_page_pa(int nasid) break; /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ - if (L1_CACHE_ALIGN(len) > buf_len) { - kfree(buf_base); + if (is_shub()) + len = L1_CACHE_ALIGN(len); + + if (len > buf_len) { + if (buf_base != NULL) + kfree(buf_base); buf_len = L1_CACHE_ALIGN(len); buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, &buf_base); @@ -105,7 +110,7 @@ xpc_get_rsvd_page_pa(int nasid) } } - ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len); + ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len); if (ret != xpSuccess) { dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); break; @@ -143,7 +148,7 @@ xpc_setup_rsvd_page(void) dev_err(xpc_part, "SAL failed to locate the reserved page\n"); return -ESRCH; } - rp = (struct xpc_rsvd_page *)__va(rp_pa); + rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa)); if (rp->SAL_version < 3) { /* SAL_versions < 3 had a SAL_partid defined as a u8 */ diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index b5bbe59f9c57..bbf0e2ee6fd9 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -157,22 +157,24 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) { int ret; -#if defined CONFIG_X86_64 - ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address), - mq->order, &mq->mmr_offset); - if (ret < 0) { - dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, " - "ret=%d\n", ret); - return ret; - } -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV - ret = sn_mq_watchlist_alloc(mq->mmr_blade, (void *)uv_gpa(mq->address), +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + + ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), mq->order, &mq->mmr_offset); if (ret < 0) { dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", ret); return -EBUSY; } +#elif defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, " + "ret=%d\n", ret); + return ret; + } #else #error not a supported configuration #endif @@ -185,12 +187,13 @@ static void xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) { int ret; + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); #if defined CONFIG_X86_64 - ret = uv_bios_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num); + ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); BUG_ON(ret != BIOS_STATUS_SUCCESS); #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV - ret = sn_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num); + ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); BUG_ON(ret != SALRET_OK); #else #error not a supported configuration -- cgit v1.2.3 From 56abcf24ff993291b20efd6e3402cd3d12f5cee2 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 15 Dec 2009 16:48:20 -0800 Subject: gru: function to generate chipset IPI values Create a function to generate the value that is written to the UV hub MMR to cause an IPI interrupt to be sent. The function will be used in the GRU message queue error recovery code that sends IPIs to nodes in remote partitions. Signed-off-by: Jack Steiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uv/uv_hub.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 8f1332bbfd72..811bfabc80b7 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -172,6 +172,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) #define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) +#define UV_GLOBAL_GRU_MMR_BASE 0x4000000 + #define UV_GLOBAL_MMR32_PNODE_SHIFT 15 #define UV_GLOBAL_MMR64_PNODE_SHIFT 26 @@ -327,6 +329,15 @@ static inline unsigned long uv_read_global_mmr64(int pnode, return readq(uv_global_mmr64_address(pnode, offset)); } +/* + * Global MMR space addresses when referenced by the GRU. (GRU does + * NOT use socket addressing). + */ +static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) +{ + return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); +} + /* * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. @@ -454,6 +465,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) } } +static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) +{ + return (1UL << UVH_IPI_INT_SEND_SHFT) | + ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | + (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | + (vector << UVH_IPI_INT_VECTOR_SHFT); +} + static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) { unsigned long val; @@ -462,10 +481,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) if (vector == NMI_VECTOR) dmode = dest_NMI; - val = (1UL << UVH_IPI_INT_SEND_SHFT) | - ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | - (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); + val = uv_hub_ipi_value(apicid, vector, dmode); uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -- cgit v1.2.3 From a66022c457755b5eef61e30866114679c95e1f54 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 15 Dec 2009 16:48:28 -0800 Subject: iommu-helper: use bitmap library Use bitmap library and kill some unused iommu helper functions. 1. s/iommu_area_free/bitmap_clear/ 2. s/iommu_area_reserve/bitmap_set/ 3. Use bitmap_find_next_zero_area instead of find_next_zero_area This cannot be simple substitution because find_next_zero_area doesn't check the last bit of the limit in bitmap 4. Remove iommu_area_free, iommu_area_reserve, and find_next_zero_area Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: FUJITA Tomonori Cc: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/iommu.c | 4 +-- arch/sparc/kernel/iommu.c | 3 +- arch/x86/kernel/amd_iommu.c | 4 +-- arch/x86/kernel/pci-calgary_64.c | 6 ++-- arch/x86/kernel/pci-gart_64.c | 6 ++-- include/linux/iommu-helper.h | 3 -- lib/iommu-helper.c | 59 ++++++---------------------------------- 7 files changed, 21 insertions(+), 64 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index fd51578e29dd..5547ae6e6b0b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -251,7 +251,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, } ppc_md.tce_free(tbl, entry, npages); - iommu_area_free(tbl->it_map, free_entry, npages); + bitmap_clear(tbl->it_map, free_entry, npages); } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 7690cc219ecc..5fad94950e76 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_PCI #include @@ -169,7 +170,7 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; - iommu_area_free(arena->map, entry, npages); + bitmap_clear(arena->map, entry, npages); } int iommu_table_init(struct iommu *iommu, int tsbsize, diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b990b5cc9541..23824fef789c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -1162,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; - iommu_area_free(range->bitmap, address, pages); + bitmap_clear(range->bitmap, address, pages); } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index c563e4c8ff39..2bbde6078143 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -212,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, spin_lock_irqsave(&tbl->it_lock, flags); - iommu_area_reserve(tbl->it_map, index, npages); + bitmap_set(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); } @@ -303,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, spin_lock_irqsave(&tbl->it_lock, flags); - iommu_area_free(tbl->it_map, entry, npages); + bitmap_clear(tbl->it_map, entry, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 56c0e730d3fe..34de53b46f87 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -126,7 +126,7 @@ static void free_iommu(unsigned long offset, int size) unsigned long flags; spin_lock_irqsave(&iommu_bitmap_lock, flags); - iommu_area_free(iommu_gart_bitmap, offset, size); + bitmap_clear(iommu_gart_bitmap, offset, size); if (offset >= next_bit) next_bit = offset + size; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); @@ -792,7 +792,7 @@ int __init gart_iommu_init(void) * Out of IOMMU space handling. * Reserve some invalid pages at the beginning of the GART. */ - iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES); pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", iommu_size >> 20); diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 3b068e5b5671..64d1b638745d 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -14,14 +14,11 @@ static inline unsigned long iommu_device_max_index(unsigned long size, extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, unsigned long boundary_size); -extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, unsigned long boundary_size, unsigned long align_mask); -extern void iommu_area_free(unsigned long *map, unsigned long start, - unsigned int nr); extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len, unsigned long io_page_size); diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 75dbda03f4fb..c0251f4ad08b 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -3,41 +3,7 @@ */ #include -#include - -static unsigned long find_next_zero_area(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask) -{ - unsigned long index, end, i; -again: - index = find_next_zero_bit(map, size, start); - - /* Align allocation */ - index = (index + align_mask) & ~align_mask; - - end = index + nr; - if (end >= size) - return -1; - for (i = index; i < end; i++) { - if (test_bit(i, map)) { - start = i+1; - goto again; - } - } - return index; -} - -void iommu_area_reserve(unsigned long *map, unsigned long i, int len) -{ - unsigned long end = i + len; - while (i < end) { - __set_bit(i, map); - i++; - } -} +#include int iommu_is_span_boundary(unsigned int index, unsigned int nr, unsigned long shift, @@ -55,31 +21,24 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long align_mask) { unsigned long index; + + /* We don't want the last of the limit */ + size -= 1; again: - index = find_next_zero_area(map, size, start, nr, align_mask); - if (index != -1) { + index = bitmap_find_next_zero_area(map, size, start, nr, align_mask); + if (index < size) { if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { /* we could do more effectively */ start = index + 1; goto again; } - iommu_area_reserve(map, index, nr); + bitmap_set(map, index, nr); + return index; } - return index; + return -1; } EXPORT_SYMBOL(iommu_area_alloc); -void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr) -{ - unsigned long end = start + nr; - - while (start < end) { - __clear_bit(start, map); - start++; - } -} -EXPORT_SYMBOL(iommu_area_free); - unsigned long iommu_num_pages(unsigned long addr, unsigned long len, unsigned long io_page_size) { -- cgit v1.2.3 From 853b3da10d617f08340e5fe569c99e7b54f2a568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Dec 2009 00:34:13 -0500 Subject: sanitize do_pipe_flags() callers in arch * hpux_pipe() - no need to take BKL * sys32_pipe() in arch/x86/ia32 and xtensa_pipe() in arch/xtensa - no need at all, since both functions are open-coded sys_pipe() Signed-off-by: Al Viro --- arch/parisc/hpux/sys_hpux.c | 7 +------ arch/x86/include/asm/sys_ia32.h | 1 - arch/xtensa/include/asm/syscall.h | 1 - arch/xtensa/include/asm/unistd.h | 2 +- arch/xtensa/kernel/syscall.c | 18 ------------------ 5 files changed, 2 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index 18072e03a019..92343bd35fa3 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -445,12 +445,7 @@ done: int hpux_pipe(int *kstack_fildes) { - int error; - - lock_kernel(); - error = do_pipe_flags(kstack_fildes, 0); - unlock_kernel(); - return error; + return do_pipe_flags(kstack_fildes, 0); } /* lies - says it works, but it really didn't lock anything */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 4a5a089e1c62..d5f69045c100 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -30,7 +30,6 @@ struct mmap_arg_struct; asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); -asmlinkage long sys32_pipe(int __user *); struct sigaction32; struct old_sigaction32; asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 4352dbe1186a..efcf33b92e4c 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -12,7 +12,6 @@ struct pt_regs; struct sigaction; asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*); asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*); -asmlinkage long xtensa_pipe(int __user *); asmlinkage long xtensa_ptrace(long, long, long, long); asmlinkage long xtensa_sigreturn(struct pt_regs*); asmlinkage long xtensa_rt_sigreturn(struct pt_regs*); diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index fbf318b3af3e..528042c2951e 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -94,7 +94,7 @@ __SYSCALL( 35, sys_readlink, 3) #define __NR_mknod 36 __SYSCALL( 36, sys_mknod, 3) #define __NR_pipe 37 -__SYSCALL( 37, xtensa_pipe, 1) +__SYSCALL( 37, sys_pipe, 1) #define __NR_unlink 38 __SYSCALL( 38, sys_unlink, 1) #define __NR_rmdir 39 diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 1e67bab775c1..816e6d0d686c 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -39,24 +39,6 @@ syscall_t sys_call_table[__NR_syscall_count] /* FIXME __cacheline_aligned */= { #include }; -/* - * xtensa_pipe() is the normal C calling standard for creating a pipe. It's not - * the way unix traditional does this, though. - */ - -asmlinkage long xtensa_pipe(int __user *userfds) -{ - int fd[2]; - int error; - - error = do_pipe_flags(fd, 0); - if (!error) { - if (copy_to_user(userfds, fd, 2 * sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg) { unsigned long ret; -- cgit v1.2.3 From 5714868812b563ba8816c1d974f4f07c76941c30 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 15 Dec 2009 10:19:50 +0900 Subject: PCI: fix section mismatch on update_res() Remark update_res from __init to __devinit as it is called also from __devinit functions. This patch removes the following warning message: WARNING: vmlinux.o(.devinit.text+0x774a): Section mismatch in reference from the function pci_root_bus_res() to the function .init.text:update_res() The function __devinit pci_root_bus_res() references a function __init update_res(). If update_res is only used by pci_root_bus_res then annotate update_res with a matching annotation. Signed-off-by: Jiri Slaby Cc: Aristeu Sergio Cc: Jesse Barnes Cc: linux-pci@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- arch/x86/pci/bus_numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 145df00e0387..f939d603adfa 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -51,7 +51,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) } } -void __init update_res(struct pci_root_info *info, size_t start, +void __devinit update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge) { int i; -- cgit v1.2.3 From 9d260ebc09a0ad6b5c73e17676df42c7bc75ff64 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 16 Dec 2009 15:43:55 +0100 Subject: x86, amd: Get multi-node CPU info from NodeId MSR instead of PCI config space Use NodeId MSR to get NodeId and number of nodes per processor. Signed-off-by: Andreas Herrmann LKML-Reference: <20091216144355.GB28798@alberich.amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 53 +++++++++++---------------------------- 3 files changed, 17 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 613700f27a4a..637e1ec963c3 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -153,6 +153,7 @@ #define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ +#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ /* * Auxiliary flags: Linux defined - For features scattered in various diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ac98d2914ebf..1cd58cdbc03f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -124,6 +124,7 @@ #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff #define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8dc3ea145c97..e485825130d2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid) /* * Fixup core topology information for AMD multi-node processors. - * Assumption 1: Number of cores in each internal node is the same. - * Assumption 2: Mixed systems with both single-node and dual-node - * processors are not supported. + * Assumption: Number of cores in each internal node is the same. */ #ifdef CONFIG_X86_HT static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) { -#ifdef CONFIG_PCI - u32 t, cpn; - u8 n, n_id; + unsigned long long value; + u32 nodes, cores_per_node; int cpu = smp_processor_id(); + if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) + return; + /* fixup topology information only once for a core */ if (cpu_has(c, X86_FEATURE_AMD_DCM)) return; - /* check for multi-node processor on boot cpu */ - t = read_pci_config(0, 24, 3, 0xe8); - if (!(t & (1 << 29))) + rdmsrl(MSR_FAM10H_NODE_ID, value); + + nodes = ((value >> 3) & 7) + 1; + if (nodes == 1) return; set_cpu_cap(c, X86_FEATURE_AMD_DCM); + cores_per_node = c->x86_max_cores / nodes; - /* cores per node: each internal node has half the number of cores */ - cpn = c->x86_max_cores >> 1; + /* store NodeID, use llc_shared_map to store sibling info */ + per_cpu(cpu_llc_id, cpu) = value & 7; - /* even-numbered NB_id of this dual-node processor */ - n = c->phys_proc_id << 1; - - /* - * determine internal node id and assign cores fifty-fifty to - * each node of the dual-node processor - */ - t = read_pci_config(0, 24 + n, 3, 0xe8); - n = (t>>30) & 0x3; - if (n == 0) { - if (c->cpu_core_id < cpn) - n_id = 0; - else - n_id = 1; - } else { - if (c->cpu_core_id < cpn) - n_id = 1; - else - n_id = 0; - } - - /* compute entire NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; - - /* fixup core id to be in range from 0 to cpn */ - c->cpu_core_id = c->cpu_core_id % cpn; -#endif + /* fixup core id to be in range from 0 to (cores_per_node - 1) */ + c->cpu_core_id = c->cpu_core_id % cores_per_node; } #endif -- cgit v1.2.3 From 6ede31e03084ee084bcee073ef3d1136f68d0906 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 Dec 2009 00:16:25 +0100 Subject: x86, msr: msrs_alloc/free for CONFIG_SMP=n Randy Dunlap reported the following build error: "When CONFIG_SMP=n, CONFIG_X86_MSR=m: ERROR: "msrs_free" [drivers/edac/amd64_edac_mod.ko] undefined! ERROR: "msrs_alloc" [drivers/edac/amd64_edac_mod.ko] undefined!" This is due to the fact that is conditioned on CONFIG_SMP and in the UP case we have only the stubs in the header. Fork off SMP functionality into a new file (msr-smp.c) and build msrs_{alloc,free} unconditionally. Reported-by: Randy Dunlap Cc: H. Peter Anvin Signed-off-by: Borislav Petkov LKML-Reference: <20091216231625.GD27228@liondog.tnic> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 12 +++ arch/x86/lib/Makefile | 4 +- arch/x86/lib/msr-smp.c | 204 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/msr.c | 213 --------------------------------------------- 4 files changed, 218 insertions(+), 215 deletions(-) create mode 100644 arch/x86/lib/msr-smp.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index cf985aa00660..c5bc4c2d33f5 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -27,6 +27,18 @@ struct msr { }; }; +struct msr_info { + u32 msr_no; + struct msr reg; + struct msr *msrs; + int err; +}; + +struct msr_regs_info { + u32 *regs; + int err; +}; + static inline unsigned long long native_read_tscp(unsigned int *aux) { unsigned long low, high; diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472895fb..706be8bf967b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) := msr.o +obj-$(CONFIG_SMP) += msr-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o @@ -22,7 +22,7 @@ lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-y += insn.o inat.o -obj-y += msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c new file mode 100644 index 000000000000..a6b1b86d2253 --- /dev/null +++ b/arch/x86/lib/msr-smp.c @@ -0,0 +1,204 @@ +#include +#include +#include +#include + +static void __rdmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + rdmsr(rv->msr_no, reg->l, reg->h); +} + +static void __wrmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + wrmsr(rv->msr_no, reg->l, reg->h); +} + +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err; +} +EXPORT_SYMBOL(rdmsr_on_cpu); + +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + + return err; +} +EXPORT_SYMBOL(wrmsr_on_cpu); + +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) +{ + struct msr_info rv; + int this_cpu; + + memset(&rv, 0, sizeof(rv)); + + rv.msrs = msrs; + rv.msr_no = msr_no; + + this_cpu = get_cpu(); + + if (cpumask_test_cpu(this_cpu, mask)) + msr_func(&rv); + + smp_call_function_many(mask, msr_func, &rv, 1); + put_cpu(); +} + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} +EXPORT_SYMBOL(rdmsr_on_cpus); + +/* + * wrmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); +} +EXPORT_SYMBOL(wrmsr_on_cpus); + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +static void __rdmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); +} + +static void __wrmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); +} + +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_on_cpu); + +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_on_cpu); + +/* + * These variants are significantly slower, but allows control over + * the entire 32-bit GPR set. + */ +static void __rdmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = rdmsr_safe_regs(rv->regs); +} + +static void __wrmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = wrmsr_safe_regs(rv->regs); +} + +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); + +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 872834177937..8f8eebdca7d4 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,123 +1,7 @@ #include #include -#include #include -struct msr_info { - u32 msr_no; - struct msr reg; - struct msr *msrs; - int err; -}; - -static void __rdmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - rdmsr(rv->msr_no, reg->l, reg->h); -} - -static void __wrmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - wrmsr(rv->msr_no, reg->l, reg->h); -} - -int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err; -} -EXPORT_SYMBOL(rdmsr_on_cpu); - -int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); - - return err; -} -EXPORT_SYMBOL(wrmsr_on_cpu); - -static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, - struct msr *msrs, - void (*msr_func) (void *info)) -{ - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - msr_func(&rv); - - smp_call_function_many(mask, msr_func, &rv, 1); - put_cpu(); -} - -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); -} -EXPORT_SYMBOL(rdmsr_on_cpus); - -/* - * wrmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); -} -EXPORT_SYMBOL(wrmsr_on_cpus); - struct msr *msrs_alloc(void) { struct msr *msrs = NULL; @@ -137,100 +21,3 @@ void msrs_free(struct msr *msrs) free_percpu(msrs); } EXPORT_SYMBOL(msrs_free); - -/* These "safe" variants are slower and should be used when the target MSR - may not actually exist. */ -static void __rdmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); -} - -static void __wrmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); -} - -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_on_cpu); - -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_on_cpu); - -/* - * These variants are significantly slower, but allows control over - * the entire 32-bit GPR set. - */ -struct msr_regs_info { - u32 *regs; - int err; -}; - -static void __rdmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = rdmsr_safe_regs(rv->regs); -} - -static void __wrmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = wrmsr_safe_regs(rv->regs); -} - -int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); - -int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); -- cgit v1.2.3 From 45a94d7cd45ed991914011919e7d40eb6d2546d1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 16 Dec 2009 16:25:42 -0800 Subject: x86, cpuid: Add "volatile" to asm in native_cpuid() xsave_cntxt_init() does something like: cpuid(0xd, ..); // find out what features FP/SSE/.. etc are supported xsetbv(); // enable the features known to OS cpuid(0xd, ..); // find out the size of the context for features enabled Depending on what features get enabled in xsetbv(), value of the cpuid.eax=0xd.ecx=0.ebx changes correspondingly (representing the size of the context that is enabled). As we don't have volatile keyword for native_cpuid(), gcc 4.1.2 optimizes away the second cpuid and the kernel continues to use the cpuid information obtained before xsetbv(), ultimately leading to kernel crash on processors supporting more state than the legacy FP/SSE. Add "volatile" for native_cpuid(). Signed-off-by: Suresh Siddha LKML-Reference: <1261009542.2745.55.camel@sbs-t61.sc.intel.com> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6f8ec1c37e0a..fc801bab1b3b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -181,7 +181,7 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ - asm("cpuid" + asm volatile("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), -- cgit v1.2.3 From 329962503692b42d8088f31584e42d52db179d52 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 15 Dec 2009 17:59:02 -0800 Subject: x86: Fix checking of SRAT when node 0 ram is not from 0 Found one system that boot from socket1 instead of socket0, SRAT get rejected... [ 0.000000] SRAT: Node 1 PXM 0 0-a0000 [ 0.000000] SRAT: Node 1 PXM 0 100000-80000000 [ 0.000000] SRAT: Node 1 PXM 0 100000000-2080000000 [ 0.000000] SRAT: Node 0 PXM 1 2080000000-4080000000 [ 0.000000] SRAT: Node 2 PXM 2 4080000000-6080000000 [ 0.000000] SRAT: Node 3 PXM 3 6080000000-8080000000 [ 0.000000] SRAT: Node 4 PXM 4 8080000000-a080000000 [ 0.000000] SRAT: Node 5 PXM 5 a080000000-c080000000 [ 0.000000] SRAT: Node 6 PXM 6 c080000000-e080000000 [ 0.000000] SRAT: Node 7 PXM 7 e080000000-10080000000 ... [ 0.000000] NUMA: Allocated memnodemap from 500000 - 701040 [ 0.000000] NUMA: Using 20 for the hash shift. [ 0.000000] Adding active range (0, 0x2080000, 0x4080000) 0 entries of 3200 used [ 0.000000] Adding active range (1, 0x0, 0x96) 1 entries of 3200 used [ 0.000000] Adding active range (1, 0x100, 0x7f750) 2 entries of 3200 used [ 0.000000] Adding active range (1, 0x100000, 0x2080000) 3 entries of 3200 used [ 0.000000] Adding active range (2, 0x4080000, 0x6080000) 4 entries of 3200 used [ 0.000000] Adding active range (3, 0x6080000, 0x8080000) 5 entries of 3200 used [ 0.000000] Adding active range (4, 0x8080000, 0xa080000) 6 entries of 3200 used [ 0.000000] Adding active range (5, 0xa080000, 0xc080000) 7 entries of 3200 used [ 0.000000] Adding active range (6, 0xc080000, 0xe080000) 8 entries of 3200 used [ 0.000000] Adding active range (7, 0xe080000, 0x10080000) 9 entries of 3200 used [ 0.000000] SRAT: PXMs only cover 917504MB of your 1048566MB e820 RAM. Not used. [ 0.000000] SRAT: SRAT not used. the early_node_map is not sorted because node0 with non zero start come first. so try to sort it right away after all regions are registered. also fixs refression by 8716273c (x86: Export srat physical topology) -v2: make it more solid to handle cross node case like node0 [0,4g), [8,12g) and node1 [4g, 8g), [12g, 16g) -v3: update comments. Reported-and-tested-by: Jens Axboe Signed-off-by: Yinghai Lu LKML-Reference: <4B2579D2.3010201@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_32.c | 2 ++ arch/x86/mm/srat_64.c | 4 +++- include/linux/mm.h | 3 +++ mm/page_alloc.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6f8aa33031c7..9324f13492d5 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void) e820_register_active_regions(chunk->nid, chunk->start_pfn, min(chunk->end_pfn, max_pfn)); } + /* for out of order entries in SRAT */ + sort_node_map(); for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index d89075489664..a27124185fc1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -317,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) unsigned long s = nodes[i].start >> PAGE_SHIFT; unsigned long e = nodes[i].end >> PAGE_SHIFT; pxmram += e - s; - pxmram -= absent_pages_in_range(s, e); + pxmram -= __absent_pages_in_range(i, s, e); if ((long)pxmram < 0) pxmram = 0; } @@ -373,6 +373,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) for_each_node_mask(i, nodes_parsed) e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); + /* for out of order entries in SRAT */ + sort_node_map(); if (!nodes_cover_memory(nodes)) { bad_srat(); return -1; diff --git a/include/linux/mm.h b/include/linux/mm.h index 24c395694f4d..20a2036f3a94 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1022,6 +1022,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); +void sort_node_map(void); +unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, + unsigned long end_pfn); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac63f41e..873c86308b4e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3573,7 +3573,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, * then all holes in the requested range will be accounted for. */ -static unsigned long __meminit __absent_pages_in_range(int nid, +unsigned long __meminit __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { @@ -4102,7 +4102,7 @@ static int __init cmp_node_active_region(const void *a, const void *b) } /* sort the node_map by start_pfn */ -static void __init sort_node_map(void) +void __init sort_node_map(void) { sort(early_node_map, (size_t)nr_nodemap_entries, sizeof(struct node_active_region), -- cgit v1.2.3 From 6a1e008a0915f502eb026fb995ea3e49d5b017f7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 15 Dec 2009 17:59:03 -0800 Subject: x86: Increase MAX_EARLY_RES; insufficient on 32-bit NUMA Due to recent changes wakeup and mptable, we run out of early reservations on 32-bit NUMA. Thus, adjust the available number. Signed-off-by: Yinghai Lu LKML-Reference: <4B22D754.2020706@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index f50447d961c0..05ed7ab2ca48 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -724,7 +724,7 @@ core_initcall(e820_mark_nvs_memory); /* * Early reserved memory areas. */ -#define MAX_EARLY_RES 20 +#define MAX_EARLY_RES 32 struct early_res { u64 start, end; -- cgit v1.2.3 From a4636818f8e0991f32d9528f39cf4f3d6a7d30a3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 17 Dec 2009 11:43:29 -0600 Subject: cpumask: rename tsk_cpumask to tsk_cpus_allowed Noone uses this wrapper yet, and Ingo asked that it be kept consistent with current task_struct usage. (One user crept in via linux-next: fixed) Signed-off-by: Rusty Russell Cc: Tejun Heo --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- include/linux/sched.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9df9441a9a2..f125e5c551c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1136,7 +1136,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) return -ENOMEM; - cpumask_copy(oldmask, tsk_cpumask(current)); + cpumask_copy(oldmask, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 244c287a5ac1..4d7adb282bdd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1555,7 +1555,7 @@ struct task_struct { }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed) +#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT -- cgit v1.2.3 From 61c1917f47f73c968e92d04d15370b1dc3ec4592 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Dec 2009 05:40:33 +0100 Subject: perf events, x86/stacktrace: Make stack walking optional The current print_context_stack helper that does the stack walking job is good for usual stacktraces as it walks through all the stack and reports even addresses that look unreliable, which is nice when we don't have frame pointers for example. But we have users like perf that only require reliable stacktraces, and those may want a more adapted stack walker, so lets make this function a callback in stacktrace_ops that users can tune for their needs. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1261024834-5336-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 18 ++++++++++++++++++ arch/x86/kernel/cpu/perf_event.c | 1 + arch/x86/kernel/dumpstack.c | 9 +++++---- arch/x86/kernel/dumpstack.h | 6 ------ arch/x86/kernel/dumpstack_32.c | 2 +- arch/x86/kernel/dumpstack_64.c | 4 ++-- arch/x86/kernel/stacktrace.c | 18 ++++++++++-------- arch/x86/oprofile/backtrace.c | 9 +++++---- kernel/trace/trace_sysprof.c | 1 + 9 files changed, 43 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index cf86a5e73815..6c75151a3cca 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -5,6 +5,23 @@ extern int kstack_depth_to_print; int x86_is_stack_id(int id, char *name); +struct thread_info; +struct stacktrace_ops; + +typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, + unsigned long *stack, + unsigned long bp, + const struct stacktrace_ops *ops, + void *data, + unsigned long *end, + int *graph); + +extern unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + /* Generic stack tracer with callbacks */ struct stacktrace_ops { @@ -14,6 +31,7 @@ struct stacktrace_ops { void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); + walk_stack_t walk_stack; }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 45506d5dd8df..d3802ee5a416 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2336,6 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, + .walk_stack = print_context_stack, }; #include "../dumpstack.h" diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0a0aa1cec8f1..8aaa119b7cad 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -141,10 +141,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, + .walk_stack = print_context_stack, }; void diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 81086c227ab7..4fd1420faffa 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,12 +14,6 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif -extern unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0ed4c7abb62..ae775ca47b25 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -58,7 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index b13af53883aa..0ad9597073f5 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -188,8 +188,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); + bp = ops->walk_stack(tinfo, stack, bp, ops, + data, estack_end, &graph); ops->stack(data, ""); /* * We link to the next stack via the diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index c3eb207181fe..922eefbb3f6c 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, - .stack = save_stack_stack, - .address = save_stack_address, + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address, + .walk_stack = print_context_stack, }; static const struct stacktrace_ops save_stack_ops_nosched = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, - .stack = save_stack_stack, - .address = save_stack_address_nosched, + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address_nosched, + .walk_stack = print_context_stack, }; /* diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 044897be021f..3855096c59b8 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -41,10 +41,11 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, + .walk_stack = print_context_stack, }; struct frame_head { diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index f6693969287d..a7974a552ca9 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, + .walk_stack = print_context_stack, }; static int -- cgit v1.2.3 From 06d65bda75341485d32f33da474b0664819ad497 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Dec 2009 05:40:34 +0100 Subject: perf events, x86/stacktrace: Fix performance/softlockup by providing a special frame pointer-only stack walker It's just wasteful for stacktrace users like perf to walk through every entries on the stack whereas these only accept reliable ones, ie: that the frame pointer validates. Since perf requires pure reliable stacktraces, it needs a stack walker based on frame pointers-only to optimize the stacktrace processing. This might solve some near-lockup scenarios that can be triggered by call-graph tracing timer events. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1261024834-5336-2-git-send-regression-fweisbec@gmail.com> [ v2: fix for modular builds and small detail tidyup ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 6 ++++++ arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/dumpstack.c | 28 ++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 6c75151a3cca..35e89122a42f 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -22,6 +22,12 @@ print_context_stack(struct thread_info *tinfo, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); +extern unsigned long +print_context_stack_bp(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d3802ee5a416..c223b7e895d9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2336,7 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, - .walk_stack = print_context_stack, + .walk_stack = print_context_stack_bp, }; #include "../dumpstack.h" diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8aaa119b7cad..c56bc2873030 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -109,6 +109,30 @@ print_context_stack(struct thread_info *tinfo, } return bp; } +EXPORT_SYMBOL_GPL(print_context_stack); + +unsigned long +print_context_stack_bp(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + unsigned long *ret_addr = &frame->return_address; + + while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + unsigned long addr = *ret_addr; + + if (__kernel_text_address(addr)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + ret_addr = &frame->return_address; + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + } + } + return (unsigned long)frame; +} +EXPORT_SYMBOL_GPL(print_context_stack_bp); static void @@ -143,8 +167,8 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .stack = print_trace_stack, + .address = print_trace_address, .walk_stack = print_context_stack, }; -- cgit v1.2.3 From 4beb3d6d144c41525541cce2b611858b2645c725 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 16 Dec 2009 17:39:48 -0800 Subject: x86: Don't use POSIX character classes in gen-insn-attr-x86.awk Not all awk implementations (including the default awk in Ubuntu 9.10) support POSIX character classes. Since x86-opcode-map.txt is plain ASCII, we can just use explicit ranges for lower case, alphabetic, and alphanumeric characters instead. Signed-off-by: Roland Dreier Acked-by: Masami Hiramatsu LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/tools/gen-insn-attr-x86.awk | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 7a6850683c34..eaf11f52fc0b 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -6,8 +6,6 @@ # Awk implementation sanity check function check_awk_implement() { - if (!match("abc", "[[:lower:]]+")) - return "Your awk doesn't support charactor-class." if (sprintf("%x", 0) != "0") return "Your awk has a printf-format problem." return "" @@ -44,12 +42,12 @@ BEGIN { delete gtable delete atable - opnd_expr = "^[[:alpha:]/]" + opnd_expr = "^[A-Za-z/]" ext_expr = "^\\(" sep_expr = "^\\|$" - group_expr = "^Grp[[:alnum:]]+" + group_expr = "^Grp[0-9A-Za-z]+" - imm_expr = "^[IJAO][[:lower:]]" + imm_expr = "^[IJAO][a-z]" imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" @@ -62,7 +60,7 @@ BEGIN { imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" - modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO -- cgit v1.2.3 From 04a1e62c2cec820501f93526ad1e46073b802dc4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2009 07:04:56 -0800 Subject: x86/ptrace: make genregs[32]_get/set more robust The loop condition is fragile: we compare an unsigned value to zero, and then decrement it by something larger than one in the loop. All the callers should be passing in appropriately aligned buffer lengths, but it's better to just not rely on it, and have some appropriate defensive loop limits. Acked-by: Roland McGrath Signed-off-by: Linus Torvalds --- arch/x86/kernel/ptrace.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2779321046bd..017d937639fe 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -509,14 +509,14 @@ static int genregs_get(struct task_struct *target, { if (kbuf) { unsigned long *k = kbuf; - while (count > 0) { + while (count >= sizeof(*k)) { *k++ = getreg(target, pos); count -= sizeof(*k); pos += sizeof(*k); } } else { unsigned long __user *u = ubuf; - while (count > 0) { + while (count >= sizeof(*u)) { if (__put_user(getreg(target, pos), u++)) return -EFAULT; count -= sizeof(*u); @@ -535,14 +535,14 @@ static int genregs_set(struct task_struct *target, int ret = 0; if (kbuf) { const unsigned long *k = kbuf; - while (count > 0 && !ret) { + while (count >= sizeof(*k) && !ret) { ret = putreg(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const unsigned long __user *u = ubuf; - while (count > 0 && !ret) { + while (count >= sizeof(*u) && !ret) { unsigned long word; ret = __get_user(word, u++); if (ret) @@ -1458,14 +1458,14 @@ static int genregs32_get(struct task_struct *target, { if (kbuf) { compat_ulong_t *k = kbuf; - while (count > 0) { + while (count >= sizeof(*k)) { getreg32(target, pos, k++); count -= sizeof(*k); pos += sizeof(*k); } } else { compat_ulong_t __user *u = ubuf; - while (count > 0) { + while (count >= sizeof(*u)) { compat_ulong_t word; getreg32(target, pos, &word); if (__put_user(word, u++)) @@ -1486,14 +1486,14 @@ static int genregs32_set(struct task_struct *target, int ret = 0; if (kbuf) { const compat_ulong_t *k = kbuf; - while (count > 0 && !ret) { + while (count >= sizeof(*k) && !ret) { ret = putreg32(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const compat_ulong_t __user *u = ubuf; - while (count > 0 && !ret) { + while (count >= sizeof(*u) && !ret) { compat_ulong_t word; ret = __get_user(word, u++); if (ret) -- cgit v1.2.3 From 6c56ccecf05fafe100ab4ea94f6fccbf5ff00db7 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 17 Dec 2009 12:27:02 -0800 Subject: x86: Reenable TSC sync check at boot, even with NONSTOP_TSC Commit 83ce4009 did the following change If the TSC is constant and non-stop, also set it reliable. But, there seems to be few systems that will end up with TSC warp across sockets, depending on how the cpus come out of reset. Skipping TSC sync test on such systems may result in time inconsistency later. So, reenable TSC sync test even on constant and non-stop TSC systems. Set, sched_clock_stable to 1 by default and reset it in mark_tsc_unstable, if TSC sync fails. This change still gives perf benefit mentioned in 83ce4009 for systems where TSC is reliable. Signed-off-by: Venkatesh Pallipadi Acked-by: Suresh Siddha LKML-Reference: <20091217202702.GA18015@linux-os.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 1 - arch/x86/kernel/tsc.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9c31e8b09d2c..879666f4d871 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -70,7 +70,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); sched_clock_stable = 1; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cd982f48e23e..597683aa5ba0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; + sched_clock_stable = 0; printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) -- cgit v1.2.3 From 8c63450718ea62ee3a70bffde170b4d15fc72d3c Mon Sep 17 00:00:00 2001 From: "akpm@linux-foundation.org" Date: Thu, 17 Dec 2009 15:26:36 -0800 Subject: x86: Fix objdump version check in arch/x86/tools/chkobjdump.awk It says Warning: objdump version is older than 2.19 Warning: Skipping posttest. because it used the wrong field from `objdump -v': akpm:/usr/src/25> /opt/crosstool/gcc-4.0.2-glibc-2.3.6/x86_64-unknown-linux-gnu/bin/x86_64-unknown-linux-gnu-objdump -v GNU objdump 2.16.1 Copyright 2005 Free Software Foundation, Inc. This program is free software; you may redistribute it under the terms of the GNU General Public License. This program has absolutely no warranty. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton LKML-Reference: <200912172326.nBHNQaQl024796@imap1.linux-foundation.org> Signed-off-by: H. Peter Anvin Cc: Masami Hiramatsu --- arch/x86/tools/chkobjdump.awk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 0d13cd9fdcff..5bbb5a33f220 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -9,7 +9,7 @@ BEGIN { } /^GNU/ { - split($4, ver, "."); + split($3, ver, "."); if (ver[1] > od_ver || (ver[1] == od_ver && ver[2] >= od_sver)) { exit 1; -- cgit v1.2.3 From 18374d89e5fe96772102f44f535efb1198d9be08 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 17 Dec 2009 18:29:46 -0800 Subject: x86, irq: Allow 0xff for /proc/irq/[n]/smp_affinity on an 8-cpu system John Blackwood reported: > on an older Dell PowerEdge 6650 system with 8 cpus (4 are hyper-threaded), > and 32 bit (x86) kernel, once you change the irq smp_affinity of an irq > to be less than all cpus in the system, you can never change really the > irq smp_affinity back to be all cpus in the system (0xff) again, > even though no error status is returned on the "/bin/echo ff > > /proc/irq/[n]/smp_affinity" operation. > > This is due to that fact that BAD_APICID has the same value as > all cpus (0xff) on 32bit kernels, and thus the value returned from > set_desc_affinity() via the cpu_mask_to_apicid_and() function is treated > as a failure in set_ioapic_affinity_irq_desc(), and no affinity changes > are made. set_desc_affinity() is already checking if the incoming cpu mask intersects with the cpu online mask or not. So there is no need for the apic op cpu_mask_to_apicid_and() to check again and return BAD_APICID. Remove the BAD_APICID return value from cpu_mask_to_apicid_and() and also fix set_desc_affinity() to return -1 instead of using BAD_APICID to represent error conditions (as cpu_mask_to_apicid_and() can return logical or physical apicid values and BAD_APICID is really to represent bad physical apic id). Reported-by: John Blackwood Root-caused-by: John Blackwood Signed-off-by: Suresh Siddha LKML-Reference: <1261103386.2535.409.camel@sbs-t61> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hw_irq.h | 3 ++- arch/x86/kernel/apic/apic_flat_64.c | 5 +---- arch/x86/kernel/apic/bigsmp_32.c | 5 +---- arch/x86/kernel/apic/io_apic.c | 32 ++++++++++++++------------------ arch/x86/kernel/apic/x2apic_cluster.c | 5 +---- arch/x86/kernel/apic/x2apic_phys.c | 5 +---- arch/x86/kernel/apic/x2apic_uv_x.c | 5 +---- arch/x86/kernel/uv_irq.c | 3 +-- 8 files changed, 22 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 08c48a81841f..eeac829a0f44 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -103,7 +103,8 @@ extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); extern void send_cleanup_vector(struct irq_cfg *); struct irq_desc; -extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, + unsigned int *dest_id); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index d0c99abc26c3..eacbd2b31d27 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -306,10 +306,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } struct apic apic_physflat = { diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 38dcecfa5818..cb804c5091b9 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -131,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return bigsmp_cpu_to_logical_apicid(cpu); - - return BAD_APICID; + return bigsmp_cpu_to_logical_apicid(cpu); } static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d5d498fbee4b..98ced709e829 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2276,26 +2276,28 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq /* * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and * leaves desc->affinity untouched. */ unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, + unsigned int *dest_id) { struct irq_cfg *cfg; unsigned int irq; if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; + return -1; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; + return -1; cpumask_copy(desc->affinity, mask); - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + return 0; } static int @@ -2311,12 +2313,11 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) cfg = desc->chip_data; spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { + ret = set_desc_affinity(desc, mask, &dest); + if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); - ret = 0; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -3351,8 +3352,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3384,8 +3384,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) if (get_irte(irq, &irte)) return -1; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; irte.vector = cfg->vector; @@ -3567,8 +3566,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3623,8 +3621,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3730,8 +3727,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irq_cfg *cfg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a5371ec36776..cf69c59f4910 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_logical_apicid, cpu); } static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a8989aadc99a..8972f38c5ced 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } static unsigned int x2apic_phys_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b684bb303cbf..d56b0efb2057 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -225,10 +225,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 61d805df4c91..ece73d8e3240 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -215,8 +215,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) unsigned long mmr_offset; unsigned mmr_pnode; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; mmr_value = 0; -- cgit v1.2.3 From 99e8c5a3b875a34d894a711c9a3669858d6adf45 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Dec 2009 01:33:54 +0100 Subject: hw-breakpoints: Fix hardware breakpoints -> perf events dependency The kbuild's select command doesn't propagate through the config dependencies. Hence the current rules of hardware breakpoint's config can't ensure perf can never be disabled under us. We have: config X86 selects HAVE_HW_BREAKPOINTS config HAVE_HW_BREAKPOINTS select PERF_EVENTS config PERF_EVENTS [...] x86 will select the breakpoints but that won't propagate to perf events. The user can still disable the latter, but it is necessary for the breakpoints. What we need is: - x86 selects HAVE_HW_BREAKPOINTS and PERF_EVENTS - HAVE_HW_BREAKPOINTS depends on PERF_EVENTS so that we ensure PERF_EVENTS is enabled and frozen for x86. This fixes the following kind of build errors: In file included from arch/x86/kernel/hw_breakpoint.c:31: include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr': include/linux/hw_breakpoint.h:39: error: 'struct perf_event' has no member named 'attr' v2: Select also ANON_INODES from x86, required for perf Reported-by: Cyrill Gorcunov Reported-by: Michal Marek Reported-by: Andrew Randrianasulu Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Randy Dunlap Cc: K.Prasad LKML-Reference: <1261010034-7786-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/Kconfig | 4 +--- arch/x86/Kconfig | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index d82875820a15..9d055b4f0585 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -135,9 +135,7 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool - depends on HAVE_PERF_EVENTS - select ANON_INODES - select PERF_EVENTS + depends on PERF_EVENTS config HAVE_USER_RETURN_NOTIFIER bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3b2a5aca4edb..55298e891571 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,8 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_HW_BREAKPOINT + select PERF_EVENTS + select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER -- cgit v1.2.3 From 8bee738bb1979c8bf7b42716b772522ab7d26b0c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 18 Dec 2009 10:40:13 -0500 Subject: x86: Fix objdump version check in chkobjdump.awk for different formats. Different version of objdump says its version in different way; GNU objdump 2.16.1 or GNU objdump version 2.19.51.0.14-1.fc11 20090722 This patch uses the first argument which starts with a number as version string. Changes in v2: - Remove unneeded increment. Signed-off-by: Masami Hiramatsu LKML-Reference: <20091218154012.16960.5113.stgit@dhcp-100-2-132.bos.redhat.com> Suggested-by: H. Peter Anvin Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/tools/chkobjdump.awk | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 5bbb5a33f220..fd1ab80be0de 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -8,14 +8,24 @@ BEGIN { od_sver = 19; } -/^GNU/ { - split($3, ver, "."); +/^GNU objdump/ { + verstr = "" + for (i = 3; i <= NF; i++) + if (match($(i), "^[0-9]")) { + verstr = $(i); + break; + } + if (verstr == "") { + printf("Warning: Failed to find objdump version number.\n"); + exit 0; + } + split(verstr, ver, "."); if (ver[1] > od_ver || (ver[1] == od_ver && ver[2] >= od_sver)) { exit 1; } else { printf("Warning: objdump version %s is older than %d.%d\n", - $4, od_ver, od_sver); + verstr, od_ver, od_sver); print("Warning: Skipping posttest."); # Logic is inverted, because we just skip test without error. exit 0; -- cgit v1.2.3 From 0f764806438d5576ac58898332e5dcf30bb8a679 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Dec 2009 15:51:23 +0100 Subject: x86/amd-iommu: Fix initialization failure panic The assumption that acpi_table_parse passes the return value of the hanlder function to the caller proved wrong recently. The return value of the handler function is totally ignored. This makes the initialization code for AMD IOMMU buggy in a way that could cause a kernel panic on initialization. This patch fixes the issue in the AMD IOMMU driver. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1dca9c34eaeb..fb490ce7dd55 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -137,6 +137,11 @@ int amd_iommus_present; /* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; +/* + * Set to true if ACPI table parsing and hardware intialization went properly + */ +static bool amd_iommu_initialized; + /* * List of protection domains - used during resume */ @@ -929,6 +934,8 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); + amd_iommu_initialized = true; + return 0; } @@ -1263,6 +1270,9 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_iommu_all) != 0) goto free; + if (!amd_iommu_initialized) + goto free; + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; -- cgit v1.2.3 From 1d9cb470a755409ce97c3376174b1e234bd20371 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:14 -0700 Subject: ACPI: processor: introduce arch_has_acpi_pdc arch dependent helper function that tells us if we should attempt to evaluate _PDC on this machine or not. The x86 implementation assumes that the CPUs in the machine must be homogeneous, and that you cannot mix CPUs of different vendors. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/include/asm/acpi.h | 2 ++ arch/x86/include/asm/acpi.h | 7 +++++++ arch/x86/kernel/acpi/processor.c | 4 +--- drivers/acpi/processor_pdc.c | 3 +++ 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index 91df9686a0da..3b7888294648 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -132,6 +132,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; #endif +static inline bool arch_has_acpi_pdc(void) { return true; } + #define acpi_unlazy_tlb(x) #ifdef CONFIG_ACPI_NUMA diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 60d2b2db0bc5..d787e6e92bd1 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -142,6 +142,13 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) return max_cstate; } +static inline bool arch_has_acpi_pdc(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + return (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d85d1b2432ba..bcb6efe08c5d 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -79,9 +79,7 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) struct cpuinfo_x86 *c = &cpu_data(pr->id); pr->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL || - c->x86_vendor == X86_VENDOR_CENTAUR) - init_intel_pdc(pr, c); + init_intel_pdc(pr, c); return; } diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index b416c32dda04..931e735e9e37 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -69,6 +69,9 @@ static int acpi_processor_eval_pdc(struct acpi_processor *pr) void acpi_processor_set_pdc(struct acpi_processor *pr) { + if (arch_has_acpi_pdc() == false) + return; + arch_acpi_processor_init_pdc(pr); acpi_processor_eval_pdc(pr); arch_acpi_processor_cleanup_pdc(pr); -- cgit v1.2.3 From 407cd87c54e76c266245e8faef8dd4a84b7254fe Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:19 -0700 Subject: ACPI: processor: unify arch_acpi_processor_init_pdc The x86 and ia64 implementations of arch_acpi_processor_init_pdc() are almost exactly the same. The only difference is in what bits they set in obj_list buffer. Combine the boilerplate memory management code, and leave the arch-specific bit twiddling in separate implementations. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/kernel/acpi-processor.c | 34 +---------------------------- arch/x86/kernel/acpi/processor.c | 34 +---------------------------- drivers/acpi/processor_pdc.c | 45 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 67 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index dbda7bde6112..ab72d46f52c6 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -16,31 +16,7 @@ static void init_intel_pdc(struct acpi_processor *pr) { - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } + u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; @@ -52,20 +28,12 @@ static void init_intel_pdc(struct acpi_processor *pr) */ buf[2] |= ACPI_PDC_SMP_T_SWCOORD; - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pr->pdc = obj_list; - return; } /* Initialize _PDC data based on the CPU vendor */ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { - pr->pdc = NULL; init_intel_pdc(pr); return; } diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index bcb6efe08c5d..967860b43f2a 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -14,31 +14,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) { - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } + u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; @@ -62,13 +38,6 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_MWAIT)) buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pr->pdc = obj_list; - return; } @@ -78,7 +47,6 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { struct cpuinfo_x86 *c = &cpu_data(pr->id); - pr->pdc = NULL; init_intel_pdc(pr, c); return; diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 931e735e9e37..87946b6da765 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -33,6 +33,49 @@ static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { {}, }; +static void acpi_processor_init_pdc(struct acpi_processor *pr) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + pr->pdc = NULL; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pr->pdc = obj_list; + + /* Now let the arch do the bit-twiddling to buf[] */ + arch_acpi_processor_init_pdc(pr); + + return; +} + /* * _PDC is required for a BIOS-OS handshake for most of the newer * ACPI processor features. @@ -72,7 +115,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr) if (arch_has_acpi_pdc() == false) return; - arch_acpi_processor_init_pdc(pr); + acpi_processor_init_pdc(pr); acpi_processor_eval_pdc(pr); arch_acpi_processor_cleanup_pdc(pr); } -- cgit v1.2.3 From 08ea48a326d8030ef5b7fb02292faf5a53c95e0a Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:24 -0700 Subject: ACPI: processor: factor out common _PDC settings Both x86 and ia64 initialize _PDC with mostly common bit settings. Factor out the common settings and leave the arch-specific ones alone. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/kernel/acpi-processor.c | 10 +--------- arch/x86/kernel/acpi/processor.c | 10 +--------- drivers/acpi/processor_pdc.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index ab72d46f52c6..ebe23f58bd6e 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -18,15 +18,7 @@ static void init_intel_pdc(struct acpi_processor *pr) { u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; - /* - * The default of PDC_SMP_T_SWCOORD bit is set for IA64 cpu so - * that OSPM is capable of native ACPI throttling software - * coordination using BIOS supplied _TSD info. - */ - buf[2] |= ACPI_PDC_SMP_T_SWCOORD; + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; return; } diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 967860b43f2a..d722ca8cb4c7 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -16,16 +16,8 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) { u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; - /* - * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so - * that OSPM is capable of native ACPI throttling software - * coordination using BIOS supplied _TSD info. - */ - buf[2] |= ACPI_PDC_SMP_T_SWCOORD; if (cpu_has(c, X86_FEATURE_EST)) buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 87946b6da765..ccda7c95d073 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -33,6 +33,15 @@ static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { {}, }; +static void acpi_set_pdc_bits(u32 *buf) +{ + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + + /* Enable coordination with firmware's _TSD info */ + buf[2] = ACPI_PDC_SMP_T_SWCOORD; +} + static void acpi_processor_init_pdc(struct acpi_processor *pr) { struct acpi_object_list *obj_list; @@ -63,6 +72,8 @@ static void acpi_processor_init_pdc(struct acpi_processor *pr) return; } + acpi_set_pdc_bits(buf); + obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; obj->buffer.pointer = (u8 *) buf; -- cgit v1.2.3 From 6c5807d7bc7d051fce00863ffb98d36325501eb2 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:29 -0700 Subject: ACPI: processor: finish unifying arch_acpi_processor_init_pdc() The only thing arch-specific about calling _PDC is what bits get set in the input obj_list buffer. There's no need for several levels of indirection to twiddle those bits. Additionally, since we're just messing around with a buffer, we can simplify the interface; no need to pass around the entire struct acpi_processor * just to get at the buffer. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/include/asm/acpi.h | 4 ++++ arch/ia64/kernel/acpi-processor.c | 18 ------------------ arch/x86/include/asm/acpi.h | 19 +++++++++++++++++++ arch/x86/kernel/acpi/processor.c | 34 ---------------------------------- drivers/acpi/processor_pdc.c | 6 +++--- include/acpi/processor.h | 1 - 6 files changed, 26 insertions(+), 56 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index 3b7888294648..7ae58892ba8d 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -133,6 +133,10 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; #endif static inline bool arch_has_acpi_pdc(void) { return true; } +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; +} #define acpi_unlazy_tlb(x) diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index ebe23f58bd6e..7ba5accebf66 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -14,24 +14,6 @@ #include #include -static void init_intel_pdc(struct acpi_processor *pr) -{ - u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - - buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; - - return; -} - -/* Initialize _PDC data based on the CPU vendor */ -void arch_acpi_processor_init_pdc(struct acpi_processor *pr) -{ - init_intel_pdc(pr); - return; -} - -EXPORT_SYMBOL(arch_acpi_processor_init_pdc); - void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) { if (pr->pdc) { diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index d787e6e92bd1..56f462cf22d2 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -149,6 +149,25 @@ static inline bool arch_has_acpi_pdc(void) c->x86_vendor == X86_VENDOR_CENTAUR); } +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + if (cpu_has(c, X86_FEATURE_ACPI)) + buf[2] |= ACPI_PDC_T_FFH; + + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d722ca8cb4c7..0f57307f8224 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -12,40 +12,6 @@ #include #include -static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) -{ - u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - - buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; - - if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; - - if (cpu_has(c, X86_FEATURE_ACPI)) - buf[2] |= ACPI_PDC_T_FFH; - - /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled - */ - if (!cpu_has(c, X86_FEATURE_MWAIT)) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); - - return; -} - - -/* Initialize _PDC data based on the CPU vendor */ -void arch_acpi_processor_init_pdc(struct acpi_processor *pr) -{ - struct cpuinfo_x86 *c = &cpu_data(pr->id); - - init_intel_pdc(pr, c); - - return; -} - -EXPORT_SYMBOL(arch_acpi_processor_init_pdc); - void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) { if (pr->pdc) { diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index ccda7c95d073..48df08ebcec4 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -40,6 +40,9 @@ static void acpi_set_pdc_bits(u32 *buf) /* Enable coordination with firmware's _TSD info */ buf[2] = ACPI_PDC_SMP_T_SWCOORD; + + /* Twiddle arch-specific bits needed for _PDC */ + arch_acpi_set_pdc_bits(buf); } static void acpi_processor_init_pdc(struct acpi_processor *pr) @@ -81,9 +84,6 @@ static void acpi_processor_init_pdc(struct acpi_processor *pr) obj_list->pointer = obj; pr->pdc = obj_list; - /* Now let the arch do the bit-twiddling to buf[] */ - arch_acpi_processor_init_pdc(pr); - return; } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index a1b748a7a766..50edd734aec6 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -257,7 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module); DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; -void arch_acpi_processor_init_pdc(struct acpi_processor *pr); void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr); #ifdef ARCH_HAS_POWER_INIT -- cgit v1.2.3 From 47817254b8637b56730aec26eed2c337d3938bb5 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:34 -0700 Subject: ACPI: processor: unify arch_acpi_processor_cleanup_pdc The x86 and ia64 implementations of the function in $subject are exactly the same. Also, since the arch-specific implementations of setting _PDC have been completely hollowed out, remove the empty shells. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/kernel/Makefile | 4 ---- arch/ia64/kernel/acpi-processor.c | 27 --------------------------- arch/x86/kernel/acpi/Makefile | 2 +- arch/x86/kernel/acpi/processor.c | 25 ------------------------- drivers/acpi/processor_pdc.c | 21 ++++++++++++++++++++- include/acpi/processor.h | 2 -- 6 files changed, 21 insertions(+), 60 deletions(-) delete mode 100644 arch/ia64/kernel/acpi-processor.c delete mode 100644 arch/x86/kernel/acpi/processor.c (limited to 'arch/x86') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 2a75e937ae8d..e1236349c99f 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -18,10 +18,6 @@ obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += acpi-processor.o -endif - obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c deleted file mode 100644 index 7ba5accebf66..000000000000 --- a/arch/ia64/kernel/acpi-processor.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * arch/ia64/kernel/acpi-processor.c - * - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi - * - Added _PDC for platforms with Intel CPUs - */ - -#include -#include -#include -#include - -#include -#include - -void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) -{ - if (pr->pdc) { - kfree(pr->pdc->pointer->buffer.pointer); - kfree(pr->pdc->pointer); - kfree(pr->pdc); - pr->pdc = NULL; - } -} - -EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index fd5ca97a2ad5..6f35260bb3ef 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o processor.o +obj-y += cstate.o endif $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c deleted file mode 100644 index 0f57307f8224..000000000000 --- a/arch/x86/kernel/acpi/processor.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi - * - Added _PDC for platforms with Intel CPUs - */ - -#include -#include -#include -#include - -#include -#include - -void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) -{ - if (pr->pdc) { - kfree(pr->pdc->pointer->buffer.pointer); - kfree(pr->pdc->pointer); - kfree(pr->pdc); - pr->pdc = NULL; - } -} - -EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 48df08ebcec4..e786e2ce1882 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -1,3 +1,12 @@ +/* + * Copyright (C) 2005 Intel Corporation + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * + * Alex Chiang + * - Unified x86/ia64 implementations + * Venkatesh Pallipadi + * - Added _PDC for platforms with Intel CPUs + */ #include #include @@ -121,6 +130,16 @@ static int acpi_processor_eval_pdc(struct acpi_processor *pr) return status; } +static void acpi_processor_cleanup_pdc(struct acpi_processor *pr) +{ + if (pr->pdc) { + kfree(pr->pdc->pointer->buffer.pointer); + kfree(pr->pdc->pointer); + kfree(pr->pdc); + pr->pdc = NULL; + } +} + void acpi_processor_set_pdc(struct acpi_processor *pr) { if (arch_has_acpi_pdc() == false) @@ -128,7 +147,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr) acpi_processor_init_pdc(pr); acpi_processor_eval_pdc(pr); - arch_acpi_processor_cleanup_pdc(pr); + acpi_processor_cleanup_pdc(pr); } EXPORT_SYMBOL_GPL(acpi_processor_set_pdc); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 50edd734aec6..0873cd57510c 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -257,8 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module); DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; -void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr); - #ifdef ARCH_HAS_POWER_INIT void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, unsigned int cpu); -- cgit v1.2.3 From 4a28395d72a956f2dad24e343d06bc08c9afb89a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 21 Dec 2009 16:19:58 -0800 Subject: arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c: avoid cross-CPU interrupts by using smp_call_function_any() Presently acpi-cpufreq will perform the MSR read on the first CPU in the mask. That's inefficient if that CPU differs from the current CPU. Because we have to perform a cross-CPU call, but we could have run the rdmsr on the current CPU. So switch to using the new smp_call_function_any(), which will perform the call on the current CPU if that CPU is present in the mask (it is). Cc: "Zhang, Yanmin" Cc: Dave Jones Cc: Ingo Molnar Cc: Jaswinder Singh Rajput Cc: Len Brown Cc: Mike Galbraith Cc: Rusty Russell Cc: Thomas Gleixner Cc: Venkatesh Pallipadi Cc: Zhao Yakui Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index f28decf8dde3..1b1920fa7c80 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -190,9 +190,11 @@ static void do_drv_write(void *_cmd) static void drv_read(struct drv_cmd *cmd) { + int err; cmd->val = 0; - smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); + err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); + WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ } static void drv_write(struct drv_cmd *cmd) -- cgit v1.2.3 From 2f99f5c8f05e02f3df1bb4d93b6704e6f5972872 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Dec 2009 15:04:53 -0800 Subject: Revert "x86, ucode-amd: Ensure ucode update on suspend/resume after CPU off/online cycle" This reverts commit 9f15226e75583547aaf542c6be4bdac1060dd425. It's just wrong, and broke resume for Rafael even on a non-AMD CPU. As Rafael says: "... it causes microcode_init_cpu() to be called during resume even for CPUs for which there's no microcode to apply. That, in turn, results in executing request_firmware() (on Intel CPUs at least) which doesn't work at this stage of resume (we have device interrupts disabled, I/O devices are still suspended and so on). If I'm not mistaken, the "if (uci->valid)" logic means "if that CPU is known to us" , so before commit 9f15226e755 microcode_resume_cpu() was called for all CPUs already in the system during suspend, which was the right thing to do. The commit changed it so that the CPUs without microcode to apply are now treated as "unknown", which is not quite right. The problem this commit attempted to solve has to be handled differently." Bisected-and -requested-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- arch/x86/kernel/microcode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 844c02c65fcb..0c8632433090 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -394,7 +394,7 @@ static enum ucode_state microcode_update_cpu(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - if (uci->valid && uci->mc) + if (uci->valid) ustate = microcode_resume_cpu(cpu); else ustate = microcode_init_cpu(cpu); -- cgit v1.2.3 From 17a2a9b57a9a7d2fd8f97df951b5e63e0bd56ef5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 25 Dec 2009 15:40:38 -0800 Subject: x86, compress: Force i386 instructions for the decompressor Recently, some distros have started shipping versions of gcc which default to -march=i686. This breaks building kernels for pre-i686 machines, even if they have been selected in Kconfig, due to the generation of CMOV instructions. There isn't enough benefit to try to preserve the generation of these instructions even when selected, so simply force -march=i386 for the decompressor when building a 32-bit kernel. Reported-and-tested-by: Chris Rankin Signed-off-by: H. Peter Anvin LKML-Reference: <219280.97558.qm@web52907.mail.re2.yahoo.com> --- arch/x86/boot/compressed/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f8ed0658404c..f25bbd37765a 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -9,6 +9,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinu KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) -- cgit v1.2.3 From fb341f572d26e0786167cd96b90cc4febed830cf Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sat, 5 Dec 2009 12:34:11 -0200 Subject: KVM: MMU: remove prefault from invlpg handler The invlpg prefault optimization breaks Windows 2008 R2 occasionally. The visible effect is that the invlpg handler instantiates a pte which is, microseconds later, written with a different gfn by another vcpu. The OS could have other mechanisms to prevent a present translation from being used, which the hypervisor is unaware of. While the documentation states that the cpu is at liberty to prefetch tlb entries, it looks like this is not heeded, so remove tlb prefetch from invlpg. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a6017132fba8..58a0f1e88596 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -455,8 +455,6 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; - pt_element_t gpte; - gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -470,10 +468,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - pte_gpa = (sp->gfn << PAGE_SHIFT); - pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -492,18 +486,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); spin_unlock(&vcpu->kvm->mmu_lock); - - if (pte_gpa == -1) - return; - if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, - sizeof(pt_element_t))) - return; - if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { - if (mmu_topup_memory_caches(vcpu)) - return; - kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, - sizeof(pt_element_t), 0); - } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) -- cgit v1.2.3 From 6e24a6eff4571002cd48b99a2b92dc829ce39cb9 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 14 Dec 2009 17:37:35 -0200 Subject: KVM: LAPIC: make sure IRR bitmap is scanned after vm load The vcpus are initialized with irr_pending set to false, but loading the LAPIC registers with pending IRR fails to reset the irr_pending variable. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cd60c0bd1b32..3063a0c4858b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1150,6 +1150,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) hrtimer_cancel(&apic->lapic_timer.timer); update_divide_count(apic); start_apic_timer(apic); + apic->irr_pending = true; } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From dab4b911a5327859bb8f969249c6978c26cd4853 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 6 Dec 2009 18:24:15 +0100 Subject: KVM: x86: Extend KVM_SET_VCPU_EVENTS with selective updates User space may not want to overwrite asynchronously changing VCPU event states on write-back. So allow to skip nmi.pending and sipi_vector by setting corresponding bits in the flags field of kvm_vcpu_events. [avi: advertise the bits in KVM_GET_VCPU_EVENTS] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 10 +++++++++- arch/x86/include/asm/kvm.h | 4 ++++ arch/x86/kvm/x86.c | 12 ++++++++---- 3 files changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index e1a114161027..2811e452f756 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -685,7 +685,7 @@ struct kvm_vcpu_events { __u8 pad; } nmi; __u32 sipi_vector; - __u32 flags; /* must be zero */ + __u32 flags; }; 4.30 KVM_SET_VCPU_EVENTS @@ -701,6 +701,14 @@ vcpu. See KVM_GET_VCPU_EVENTS for the data structure. +Fields that may be modified asynchronously by running VCPUs can be excluded +from the update. These fields are nmi.pending and sipi_vector. Keep the +corresponding bits in the flags field cleared to suppress overwriting the +current in-kernel state. The bits are: + +KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel +KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector + 5. The kvm_run structure diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 950df434763f..f46b79f6c16c 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -254,6 +254,10 @@ struct kvm_reinject_control { __u8 reserved[31]; }; +/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ +#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 +#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d068966fb2a..6651dbf58675 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1913,7 +1913,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; - events->flags = 0; + events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR); vcpu_put(vcpu); } @@ -1921,7 +1922,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - if (events->flags) + if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) return -EINVAL; vcpu_load(vcpu); @@ -1938,10 +1940,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_pic_clear_isr_ack(vcpu->kvm); vcpu->arch.nmi_injected = events->nmi.injected; - vcpu->arch.nmi_pending = events->nmi.pending; + if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) + vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) + vcpu->arch.sipi_vector = events->sipi_vector; vcpu_put(vcpu); -- cgit v1.2.3 From d015a092989d673df44a5ad6866dc5d5006b7a2a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 28 Dec 2009 10:26:59 +0200 Subject: x86: Use KERN_DEFAULT log-level in __show_regs() Andrew Morton reported a strange looking kmemcheck warning: WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88004fba6c20) 0000000000000000310000000000000000000000000000002413000000c9ffff u u u u u u u u u u u u u u u u i i i i i i i i u u u u u u u u [] kmemleak_scan+0x25a/0x540 [] kmemleak_scan_thread+0x5b/0xe0 [] kthread+0x9e/0xb0 [] kernel_thread_helper+0x4/0x10 [] 0xffffffffffffffff The above printout is missing register dump completely. The problem here is that the output comes from syslog which doesn't show KERN_INFO log-level messages. We didn't see this before because both of us were testing on 32-bit kernels which use the _default_ log-level. Fix that up by explicitly using KERN_DEFAULT log-level for __show_regs() printks. Signed-off-by: Pekka Enberg Cc: Vegard Nossum Cc: Andrew Morton Cc: Arjan van de Ven Cc: Linus Torvalds LKML-Reference: <1261988819.4641.2.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 4 ++-- arch/x86/kernel/process_32.c | 14 +++++++------- arch/x86/kernel/process_64.c | 24 ++++++++++++------------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 98c2cdeb599e..c6ee241c8a98 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -103,8 +103,8 @@ void show_regs_common(void) if (!product) product = ""; - printk("\n"); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", + printk(KERN_CONT "\n"); + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9c517b5858f0..37ad1e046aae 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -139,16 +139,16 @@ void __show_regs(struct pt_regs *regs, int all) show_regs_common(); - printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); print_symbol("EIP is at %s\n", regs->ip); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); if (!all) @@ -158,19 +158,19 @@ void __show_regs(struct pt_regs *regs, int all) cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); get_debugreg(d3, 3); - printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", + printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", + printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", d6, d7); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 52fbd0c60198..f9e033150cdf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,19 +161,19 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int ds, cs, es; show_regs_common(); - printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, regs->flags); - printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); - printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); asm("movl %%ds,%0" : "=r" (ds)); @@ -194,21 +194,21 @@ void __show_regs(struct pt_regs *regs, int all) cr3 = read_cr3(); cr4 = read_cr4(); - printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", fs, fsindex, gs, gsindex, shadowgs); - printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) -- cgit v1.2.3 From c0ca9da442df82b67095f230f24762042f9f3b7d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 28 Dec 2009 11:02:15 +0200 Subject: x86, kmemcheck: Use KERN_WARNING for error reporting As suggested by Vegard Nossum, use KERN_WARNING for error reporting to make sure kmemcheck reports end up in syslog. Suggested-by: Vegard Nossum Signed-off-by: Pekka Enberg Cc: Andrew Morton LKML-Reference: <1261990935.4641.7.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmemcheck/error.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 4901d0dafda6..af3b6c8a436f 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -106,26 +106,25 @@ void kmemcheck_error_recall(void) switch (e->type) { case KMEMCHECK_ERROR_INVALID_ACCESS: - printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " - "from %s memory (%p)\n", + printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", 8 * e->size, e->state < ARRAY_SIZE(desc) ? desc[e->state] : "(invalid shadow state)", (void *) e->address); - printk(KERN_INFO); + printk(KERN_WARNING); for (i = 0; i < SHADOW_COPY_SIZE; ++i) - printk("%02x", e->memory_copy[i]); - printk("\n"); + printk(KERN_CONT "%02x", e->memory_copy[i]); + printk(KERN_CONT "\n"); - printk(KERN_INFO); + printk(KERN_WARNING); for (i = 0; i < SHADOW_COPY_SIZE; ++i) { if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) - printk(" %c", short_desc[e->shadow_copy[i]]); + printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); else - printk(" ?"); + printk(KERN_CONT " ?"); } - printk("\n"); - printk(KERN_INFO "%*c\n", 2 + 2 + printk(KERN_CONT "\n"); + printk(KERN_WARNING "%*c\n", 2 + 2 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); break; case KMEMCHECK_ERROR_BUG: -- cgit v1.2.3 From 39d30770992895d55789de64bad2349510af68d0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 28 Dec 2009 13:28:25 -0800 Subject: x86: SGI UV: Fix writes to led registers on remote uv hubs The wrong address was being used to write the SCIR led regs on remote hubs. Also, there was an inconsistency between how BIOS and the kernel indexed these regs. Standardize on using the lower 6 bits of the APIC ID as the index. This patch fixes the problem of writing to an errant address to a cpu # >= 64. Signed-off-by: Mike Travis Reviewed-by: Jack Steiner Cc: Robin Holt Cc: Linus Torvalds Cc: stable@kernel.org LKML-Reference: <4B3922F9.3060905@sgi.com> [ v2: fix a number of annoying checkpatch artifacts and whitespace noise ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 86 +++++++++++++++++++++----------------- arch/x86/kernel/apic/x2apic_uv_x.c | 12 +++--- 2 files changed, 54 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 811bfabc80b7..bc54fa965af3 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -31,20 +31,20 @@ * contiguous (although various IO spaces may punch holes in * it).. * - * N - Number of bits in the node portion of a socket physical - * address. + * N - Number of bits in the node portion of a socket physical + * address. * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. - * NASIDs contain up to 15 bits. + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. * * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead * of nasids. * - * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant - * of the nasid for socket usage. + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. * * * NumaLink Global Physical Address Format: @@ -71,12 +71,12 @@ * * * APICID format - * NOTE!!!!!! This is the current format of the APICID. However, code - * should assume that this will change in the future. Use functions - * in this file for all APICID bit manipulations and conversion. + * NOTE!!!!!! This is the current format of the APICID. However, code + * should assume that this will change in the future. Use functions + * in this file for all APICID bit manipulations and conversion. * - * 1111110000000000 - * 5432109876543210 + * 1111110000000000 + * 5432109876543210 * pppppppppplc0cch * sssssssssss * @@ -89,9 +89,9 @@ * Note: Processor only supports 12 bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. * - * Unless otherwise specified, all references to APICID refer to - * the FULL value contained in ACPI tables, not the subset in the - * processor APICID register. + * Unless otherwise specified, all references to APICID refer to + * the FULL value contained in ACPI tables, not the subset in the + * processor APICID register. */ @@ -151,16 +151,16 @@ struct uv_hub_info_s { }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* * Local & Global MMR space macros. - * Note: macros are intended to be used ONLY by inline functions - * in this file - not by other kernel code. - * n - NASID (full 15-bit global nasid) - * g - GNODE (full 15-bit global nasid, right shifted 1) - * p - PNODE (local part of nsids, right shifted 1) + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) */ #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) @@ -215,8 +215,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); /* * Macros for converting between kernel virtual addresses, socket local physical * addresses, and UV global physical addresses. - * Note: use the standard __pa() & __va() macros for converting - * between socket virtual and socket physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. */ /* socket phys RAM --> UV global physical address */ @@ -287,21 +287,18 @@ static inline int uv_apicid_to_pnode(int apicid) * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ -static inline unsigned long *uv_global_mmr32_address(int pnode, - unsigned long offset) +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR32_BASE | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr32(int pnode, unsigned long offset, - unsigned long val) +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) { writeq(val, uv_global_mmr32_address(pnode, offset)); } -static inline unsigned long uv_read_global_mmr32(int pnode, - unsigned long offset) +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) { return readq(uv_global_mmr32_address(pnode, offset)); } @@ -310,21 +307,18 @@ static inline unsigned long uv_read_global_mmr32(int pnode, * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int pnode, - unsigned long offset) +static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr64(int pnode, unsigned long offset, - unsigned long val) +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) { writeq(val, uv_global_mmr64_address(pnode, offset)); } -static inline unsigned long uv_read_global_mmr64(int pnode, - unsigned long offset) +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) { return readq(uv_global_mmr64_address(pnode, offset)); } @@ -338,6 +332,16 @@ static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long o return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); } +static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) +{ + writeb(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset) +{ + return readb(uv_global_mmr64_address(pnode, offset)); +} + /* * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. @@ -457,11 +461,17 @@ static inline void uv_set_scir_bits(unsigned char value) } } +static inline unsigned long uv_scir_offset(int apicid) +{ + return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); +} + static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) { if (uv_cpu_hub_info(cpu)->scir.state != value) { + uv_write_global_mmr8(uv_cpu_to_pnode(cpu), + uv_cpu_hub_info(cpu)->scir.offset, value); uv_cpu_hub_info(cpu)->scir.state = value; - uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); } } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0efb2057..5f92494dab61 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -629,8 +629,10 @@ void __init uv_system_init(void) uv_rtc_init(); for_each_present_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + nid = cpu_to_node(cpu); - pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; @@ -651,15 +653,13 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " - "lcpu %d, blade %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, - lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", + cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ -- cgit v1.2.3 From 9a7262a0563da6b91019156abf487bcdf1a41526 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 28 Dec 2009 13:28:25 -0800 Subject: x86_64 SGI UV: Fix writes to led registers on remote uv hubs. The wrong address was being used to write the SCIR led regs on remote hubs. Also, there was an inconsistency between how BIOS and the kernel indexed these regs. Standardize on using the lower 6 bits of the APIC ID as the index. This patch fixes the problem of writing to an errant address to a cpu # >= 64. Signed-off-by: Mike Travis Reviewed-by: Jack Steiner Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uv/uv_hub.h | 20 +++++++++++++++++++- arch/x86/kernel/apic/x2apic_uv_x.c | 12 ++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 811bfabc80b7..bcdb708993d2 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -338,6 +338,18 @@ static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long o return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); } +static inline void uv_write_global_mmr8(int pnode, unsigned long offset, + unsigned char val) +{ + writeb(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned char uv_read_global_mmr8(int pnode, + unsigned long offset) +{ + return readb(uv_global_mmr64_address(pnode, offset)); +} + /* * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. @@ -457,11 +469,17 @@ static inline void uv_set_scir_bits(unsigned char value) } } +static inline unsigned long uv_scir_offset(int apicid) +{ + return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); +} + static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) { if (uv_cpu_hub_info(cpu)->scir.state != value) { + uv_write_global_mmr8(uv_cpu_to_pnode(cpu), + uv_cpu_hub_info(cpu)->scir.offset, value); uv_cpu_hub_info(cpu)->scir.state = value; - uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); } } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0efb2057..5f92494dab61 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -629,8 +629,10 @@ void __init uv_system_init(void) uv_rtc_init(); for_each_present_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + nid = cpu_to_node(cpu); - pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; @@ -651,15 +653,13 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " - "lcpu %d, blade %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, - lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", + cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ -- cgit v1.2.3 From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 30 Dec 2009 15:36:42 +0800 Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable Introduce kernel parameter acpi_sleep=sci_force_enable some laptop requires SCI_EN being set directly on resume, or else they hung somewhere in the resume code path. We already have a blacklist for these laptops but we still need this option, especially when debugging some suspend/resume problems, in case there are systems that need this workaround and are not yet in the blacklist. Signed-off-by: Zhang Rui Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 5 ++++- arch/x86/kernel/acpi/sleep.c | 2 ++ drivers/acpi/sleep.c | 29 +++++++++++++++++------------ include/linux/acpi.h | 1 + 4 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5ba4d9dff113..736d45602886 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, s4_nonvs } + old_ordering, s4_nonvs, sci_force_enable } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep @@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file of _PTS is used by default). s4_nonvs prevents the kernel from saving/restoring the ACPI NVS memory during hibernation. + sci_force_enable causes the kernel to set SCI_EN directly + on resume from S1/S3 (which is against the ACPI spec, + but some broken systems don't work without it). acpi_use_timer_override [HW,ACPI] Use timer override. For some broken Nvidia NF5 boards diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..f9961034e557 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "sci_force_enable", 16) == 0) + acpi_set_sci_en_on_resume(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5f2c379ab7bf..79d33d908b5a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + +void __init acpi_set_sci_en_on_resume(void) +{ + set_sci_en_on_resume = true; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -170,18 +187,6 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; - extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ce945d4845fc..36924255c0d5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); +void __init acpi_set_sci_en_on_resume(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 48b5ba9cc98d676712da29d9931f1c88e5185ff2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 05:53:02 +0100 Subject: perf: Pass appropriate frame pointer to dump_trace() Pass the frame pointer from the regs of the interrupted path to dump_trace() while processing the stack trace. Currently, dump_trace() takes the current bp and starts the callchain from dump_trace() itself. This is wasteful because we need to walk through the entire NMI/DEBUG stack before retrieving the interrupted point. We can fix that by just using the frame pointer from the captured regs. It points exactly where we want to start. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262235183-5320-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c223b7e895d9..d616c06e99b4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2347,7 +2347,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, PERF_CONTEXT_KERNEL); callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } /* -- cgit v1.2.3 From 9dad0fd5a73d4048dff18069733c0b515f68df74 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Dec 2009 15:40:39 -0800 Subject: x86: Fix size for ex trampoline with 32bit fix for error that is introduced by | x86: Use find_e820() instead of hard coded trampoline address it should end with PAGE_SIZE + PAGE_SIZE Signed-off-by: Yinghai Lu LKML-Reference: <1261525263-13763-2-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 05ed7ab2ca48..a1a7876cadcb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -733,13 +733,13 @@ struct early_res { }; static struct early_res early_res[MAX_EARLY_RES] __initdata = { { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE) /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 }, + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 }, #endif {} -- cgit v1.2.3 From a557aae29cf5916295c234d4b10ba3f8f29b8a96 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Dec 2009 15:40:40 -0800 Subject: x86/pci: Intel ioh bus num reg accessing fix It is above 0x100 (PCI-Express extended register space), so if mmconf is not enable, we can't access it. [ hpa: changed the bound from 0x200 to 0x120, which is the tight bound. ] Reported-by: Jens Axboe Signed-off-by: Yinghai Lu LKML-Reference: <1261525263-13763-3-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/pci/intel_bus.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c index b7a55dc55d13..f81a2fa8fe25 100644 --- a/arch/x86/pci/intel_bus.c +++ b/arch/x86/pci/intel_bus.c @@ -49,6 +49,10 @@ static void __devinit pci_root_bus_res(struct pci_dev *dev) u64 mmioh_base, mmioh_end; int bus_base, bus_end; + /* some sys doesn't get mmconf enabled */ + if (dev->cfg_size < 0x120) + return; + if (pci_root_num >= PCI_ROOT_NR) { printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); return; -- cgit v1.2.3 From f4b825bde98938f160315d655597bc9731521cae Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 5 Jan 2010 12:48:49 +1030 Subject: Revert "x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium" This reverts commit ae1b22f6e46c03cede7cea234d0bf2253b4261cf. As Linus said in 982d007a6ee: "There was something really messy about cmpxchg8b and clone CPU's, so if you enable it on other CPUs later, do it carefully." This breaks lguest for those configs, but we can fix that by emulating if we have to. Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14884 Signed-off-by: Rusty Russell LKML-Reference: <201001051248.49700.rusty@rustcorp.com.au> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 08e442bc3ab9..f20ddf84a893 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -396,7 +396,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on !M386 && !M486 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.3 From 409d02ef6d74f5e91f5ea4c587b2ee1375f106fc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Jan 2010 14:19:11 +0100 Subject: x86: copy_from_user() should not return -EFAULT Callers of copy_from_user() expect it to return the number of bytes it could not copy. In no case it is supposed to return -EFAULT. In case of a detected buffer overflow just return the requested length. In addition one could think of a memset that would clear the size of the target object. [ hpa: code is not in .32 so not needed for -stable ] Signed-off-by: Heiko Carstens Acked-by: Arjan van de Ven LKML-Reference: <20100105131911.GC5480@osiris.boeblingen.de.ibm.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess_32.h | 5 ++--- arch/x86/include/asm/uaccess_64.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 0c9825e97f36..088d09fb1615 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -205,14 +205,13 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; if (likely(sz == -1 || sz >= n)) - ret = _copy_from_user(to, from, n); + n = _copy_from_user(to, from, n); else copy_from_user_overflow(); - return ret; + return n; } long __must_check strncpy_from_user(char *dst, const char __user *src, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 46324c6a4f6e..535e421498f6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -30,16 +30,15 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; might_fault(); if (likely(sz == -1 || sz >= n)) - ret = _copy_from_user(to, from, n); + n = _copy_from_user(to, from, n); #ifdef CONFIG_DEBUG_VM else WARN(1, "Buffer overflow detected!\n"); #endif - return ret; + return n; } static __always_inline __must_check -- cgit v1.2.3 From db677ffa5f5a4f15b9dad4d132b3477b80766d82 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 5 Jan 2010 12:48:49 +1030 Subject: Revert "x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium" This reverts commit ae1b22f6e46c03cede7cea234d0bf2253b4261cf. As Linus said in 982d007a6ee: "There was something really messy about cmpxchg8b and clone CPU's, so if you enable it on other CPUs later, do it carefully." This breaks lguest for those configs, but we can fix that by emulating if we have to. Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14884 Signed-off-by: Rusty Russell Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 08e442bc3ab9..f20ddf84a893 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -396,7 +396,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on !M386 && !M486 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.3 From 7f41c2e1523f628cc248e34192162aec5728bed7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 6 Jan 2010 10:56:31 -0800 Subject: x86, irq: Check move_in_progress before freeing the vector mapping With the recent irq migration fixes (post 2.6.32), Gary Hade has noticed "No IRQ handler for vector" messages during the 2.6.33-rc1 kernel boot on IBM AMD platforms and root caused the issue to this commit: > commit 23359a88e7eca3c4f402562b102f23014db3c2aa > Author: Suresh Siddha > Date: Mon Oct 26 14:24:33 2009 -0800 > > x86: Remove move_cleanup_count from irq_cfg As part of this patch, we have removed the move_cleanup_count check in smp_irq_move_cleanup_interrupt(). With this change, we can run into a situation where an irq cleanup interrupt on a cpu can cleanup the vector mappings associated with multiple irqs, of which one of the irq's migration might be still in progress. As such when that irq hits the old cpu, we get the "No IRQ handler" messages. Fix this by checking for the irq_cfg's move_in_progress and if the move is still in progress delay the vector cleanup to another irq cleanup interrupt request (which will happen when the irq starts arriving at the new cpu destination). Reported-and-tested-by: Gary Hade Signed-off-by: Suresh Siddha LKML-Reference: <1262804191.2732.7.camel@sbs-t61.sc.intel.com> Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index de00c4619a55..53243ca7816d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2434,6 +2434,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); raw_spin_lock(&desc->lock); + /* + * Check if the irq migration is in progress. If so, we + * haven't received the cleanup request yet for this irq. + */ + if (cfg->move_in_progress) + goto unlock; + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; -- cgit v1.2.3 From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jan 2010 16:11:06 -0500 Subject: x86, ACPI: delete acpi_boot_table_init() return value cleanup only. setup_arch(), doesn't care care if ACPI initialization succeeded or failed, so delete acpi_boot_table_init()'s return value. Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 22 ++++++---------------- include/linux/acpi.h | 6 +++--- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fb1035cd9a6a..036d28adf59d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * if acpi_blacklisted() acpi_disabled = 1; * acpi_irq_model=... * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure */ -int __init acpi_boot_table_init(void) +void __init acpi_boot_table_init(void) { - int error; - dmi_check_system(acpi_dmi_table); /* @@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return; /* * Initialize the ACPI boot-time table parser. */ - error = acpi_table_init(); - if (error) { + if (acpi_table_init()) { disable_acpi(); - return error; + return; } acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); @@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void) /* * blacklist may disable ACPI entirely */ - error = acpi_blacklisted(); - if (error) { + if (acpi_blacklisted()) { if (acpi_force) { printk(KERN_WARNING PREFIX "acpi=force override\n"); } else { printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); disable_acpi(); - return error; + return; } } - - return 0; } int __init early_acpi_boot_init(void) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 36924255c0d5..b926afe8c03e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); -int acpi_boot_table_init (void); +void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); @@ -321,9 +321,9 @@ static inline int acpi_boot_init(void) return 0; } -static inline int acpi_boot_table_init(void) +static inline void acpi_boot_table_init(void) { - return 0; + return; } static inline int acpi_mps_check(void) -- cgit v1.2.3 From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Fri, 8 Jan 2010 14:42:31 -0800 Subject: mm: make totalhigh_pages unsigned long Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM is set Removes redundant casts in printout messages Signed-off-by: Andreas Fenkart Acked-by: Russell King Cc: Ralf Baechle Cc: David Howells Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 2 +- arch/mips/mm/init.c | 2 +- arch/mips/sgi-ip27/ip27-memory.c | 2 +- arch/mn10300/mm/init.c | 3 +-- arch/score/mm/init.c | 2 +- arch/x86/mm/init_32.c | 3 +-- include/linux/highmem.h | 2 +- 7 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 52c40d155672..a04ffbbbe253 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -616,7 +616,7 @@ void __init mem_init(void) "%dK data, %dK init, %luK highmem)\n", nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10, datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); if (PAGE_SIZE >= 16384 && num_physpages <= 128) { extern int sysctl_overcommit_memory; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9e8d00389eef..1651942f7feb 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -424,7 +424,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index f61c164d1e67..bc1297109cc5 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -505,5 +505,5 @@ void __init mem_init(void) (num_physpages - tmp) << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index ec1420562dc7..dd27a9a35152 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -118,8 +118,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10)) - ); + totalhigh_pages << (PAGE_SHIFT - 10)); } /* diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 8c15b2c85d5a..dfaf458d6702 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -106,7 +106,7 @@ void __init mem_init(void) ram << (PAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c973f8e2a6cf..9a0c258a86be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -892,8 +892,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + totalhigh_pages << (PAGE_SHIFT-10)); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 211ff4497269..ab2cc20e21a5 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -46,7 +46,7 @@ void kmap_flush_unused(void); static inline unsigned int nr_free_highpages(void) { return 0; } -#define totalhigh_pages 0 +#define totalhigh_pages 0UL #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) -- cgit v1.2.3 From 13510997d600a076e064f10587a8f6d20f8fff41 Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Fri, 8 Jan 2010 14:42:45 -0800 Subject: x86: add support for LZO-compressed kernels The necessary changes to the x86 Kconfig and boot/compressed to allow the use of this new compression method Signed-off-by: Albin Tonnerre Acked-by: H. Peter Anvin Tested-by: Wu Zhangjin Cc: Ingo Molnar Cc: Thomas Gleixner Tested-by: Russell King Acked-by: Russell King Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/boot/compressed/Makefile | 5 ++++- arch/x86/boot/compressed/misc.c | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55298e891571..6bf1f1ac478c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -49,6 +49,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO select HAVE_HW_BREAKPOINT select PERF_EVENTS select ANON_INODES diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f25bbd37765a..fbb47daf2459 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o piggy.o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -49,10 +49,13 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzo) suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_LZO) := lzo quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 842b2a36174a..3b22fe8ab91b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -162,6 +162,10 @@ static int lines, cols; #include "../../../../lib/decompress_unlzma.c" #endif +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + static void scroll(void) { int i; -- cgit v1.2.3 From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 10 Jan 2010 16:28:09 +0200 Subject: core, x86: make LIST_POISON less deadly The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable pointers in order to trap erronous use of freed list_heads. Unfortunately userspace can arrange for those pointers to actually be dereferencable, potentially turning an oops to an expolit. To avoid this allow architectures (currently x86_64 only) to override the default values for these pointers with truly-undereferencable values. This is easy on x86_64 as the virtual address space is large and contains areas that cannot be mapped. Other 64-bit architectures will likely find similar unmapped ranges. [ingo: switch to 0xdead000000000000 as the unmapped area] [ingo: add comments, cleanup] [jaswinder: eliminate sparse warnings] Acked-by: Linus Torvalds Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar Signed-off-by: Avi Kivity Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 5 +++++ include/linux/poison.h | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6bf1f1ac478c..cbcbfdee3ee0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE def_bool X86_64 depends on MEMORY_HOTPLUG +config ILLEGAL_POINTER_VALUE + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 + source "mm/Kconfig" config HIGHPTE diff --git a/include/linux/poison.h b/include/linux/poison.h index 7fc194aef8c2..2110a81c5e2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -2,13 +2,25 @@ #define _LINUX_POISON_H /********** include/linux/list.h **********/ + +/* + * Architectures might want to move the poison pointer offset + * into some well-recognized area such as 0xdead000000000000, + * that is also not mappable by user-space exploits: + */ +#ifdef CONFIG_ILLEGAL_POINTER_VALUE +# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) +#else +# define POISON_POINTER_DELTA 0 +#endif + /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) +#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ /* -- cgit v1.2.3 From 066000dd856709b6980123eb39b957fe26993f7b Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 11 Jan 2010 15:51:04 -0800 Subject: Revert "x86, apic: Use logical flat on intel with <= 8 logical cpus" Revert commit 2fbd07a5f5d1295fa9b0c0564ec27da7c276a75a, as this commit breaks an IBM platform with quad-core Xeon cpu's. According to Suresh, this might be an IBM platform issue, as on other Intel platforms with <= 8 logical cpu's, logical flat mode works fine irespective of physical apic id values (inline with the xapic architecture). Revert this for now because of the IBM platform breakage. Another version will be re-submitted after the complete analysis. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Suresh Siddha Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 26 ++++++++++++++++++-------- arch/x86/kernel/apic/probe_64.c | 15 ++++----------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aa57c079c98f..e80f291472a4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -62,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. * - * On AMD, this determines the messaging protocol we can use: if all APIC IDs + * This determines the messaging protocol we can use: if all APIC IDs * are in the 0 ... 7 range, then we can use logical addressing which * has some performance advantages (better broadcasting). * @@ -1898,14 +1898,24 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (num_processors > 8) - def_to_bigsmp = 1; - break; - case X86_VENDOR_AMD: - if (max_physical_apicid >= 8) + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: def_to_bigsmp = 1; + } } #endif diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c4cbd3080c1c..65edc180fc82 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,23 +64,16 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif if (apic == &apic_flat) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (num_processors > 8) - apic = &apic_physflat; - break; - case X86_VENDOR_AMD: - if (max_physical_apicid >= 8) - apic = &apic_physflat; - } + if (max_physical_apicid >= 8) + apic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - if (is_vsmp_box()) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; -- cgit v1.2.3 From c2c5d45d46c8c0fd34291dec958670ad4816796f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 03:52:25 +0100 Subject: perf: Stop stack frame walking off kernel addresses boundaries While processing kernel perf callchains, an bad entry can be considered as a valid stack pointer but not as a kernel address. In this case, we hang in an endless loop. This can happen in an x86-32 kernel after processing the last entry in a kernel stacktrace. Just stop the stack frame walking after we encounter an invalid kernel address. This fixes a hard lockup in x86-32. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262227945-27014-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c56bc2873030..6d817554780a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -123,13 +123,15 @@ print_context_stack_bp(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; - if (__kernel_text_address(addr)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); - } + if (!__kernel_text_address(addr)) + break; + + ops->address(data, addr, 1); + frame = frame->next_frame; + ret_addr = &frame->return_address; + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); } + return (unsigned long)frame; } EXPORT_SYMBOL_GPL(print_context_stack_bp); -- cgit v1.2.3 From df39a2e48f99e2d706e8fa4dc99fd148eb59449d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 4 Jan 2010 16:17:21 +0000 Subject: x86: mce.h: Fix warning in header checks Someone isn't reading their build output: Move the definition out of the exported header. Signed-off-by: Alan Cox Cc: linux-kernel@vger.kernelorg Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 858baa061cfc..6c3fdd631ed3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,10 +108,11 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) -extern struct atomic_notifier_head x86_mce_decoder_chain; #ifdef __KERNEL__ +extern struct atomic_notifier_head x86_mce_decoder_chain; + #include #include #include -- cgit v1.2.3 From fcfbb2b5facd65efa7284cc315225bfe3d1856c2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 8 Jan 2010 12:13:54 -0800 Subject: x86: SGI UV: Fix mapping of MMIO registers This fixes the problem of the initialization code not correctly mapping the entire MMIO space on a UV system. A side effect is the map_high() interface needed to be changed to accommodate different address and size shifts. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: Cc: Jack Steiner Cc: Linus Torvalds LKML-Reference: <4B479202.7080705@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 5f92494dab61..b8bb869a6618 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -374,13 +374,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) enum map_type {map_wb, map_uc}; -static __init void map_high(char *id, unsigned long base, int shift, - int max_pnode, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int pshift, + int bshift, int max_pnode, enum map_type map_type) { unsigned long bytes, paddr; - paddr = base << shift; - bytes = (1UL << shift) * (max_pnode + 1); + paddr = base << pshift; + bytes = (1UL << bshift) * (max_pnode + 1); printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) @@ -396,7 +396,7 @@ static __init void map_gru_high(int max_pnode) gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); if (gru.s.enable) { - map_high("GRU", gru.s.base, shift, max_pnode, map_wb); + map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)gru.s.base << shift); gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); @@ -410,7 +410,7 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); + map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); } static __init void map_mmioh_high(int max_pnode) @@ -420,7 +420,8 @@ static __init void map_mmioh_high(int max_pnode) mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); + map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, + max_pnode, map_uc); } static __init void map_low_mmrs(void) -- cgit v1.2.3 From 42590a75019a50012f25a962246498dead428433 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 4 Jan 2010 16:16:23 +0900 Subject: x86/agp: Fix agp_amd64_init and agp_amd64_cleanup This fixes the regression introduced by the commit f405d2c02395a74d3883bd03ded36457aa3697ad. The above commit fixes the following issue: http://marc.info/?l=linux-kernel&m=126192729110083&w=2 However, it doesn't work properly when you remove and insert the agp_amd64 module again. agp_amd64_init() and agp_amd64_cleanup should be called only when gart_iommu is not called earlier (that is, the GART IOMMU is not enabled). We need to use 'gart_iommu_aperture' to see if GART IOMMU is enabled or not. Signed-off-by: FUJITA Tomonori Cc: mitov@issp.bas.bg Cc: davej@redhat.com LKML-Reference: <20100104161603L.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 1 + drivers/char/agp/amd64-agp.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b25..f147a95fd84a 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -31,6 +31,7 @@ #include int gart_iommu_aperture; +EXPORT_SYMBOL_GPL(gart_iommu_aperture); int gart_iommu_aperture_disabled __initdata; int gart_iommu_aperture_allowed __initdata; diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 5aa7a586a7ff..1afb8968a342 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -725,12 +725,11 @@ static struct pci_driver agp_amd64_pci_driver = { int __init agp_amd64_init(void) { int err = 0; - static int done = 0; if (agp_off) return -EINVAL; - if (done++) + if (gart_iommu_aperture) return agp_bridges_found ? 0 : -ENODEV; err = pci_register_driver(&agp_amd64_pci_driver); @@ -771,6 +770,8 @@ int __init agp_amd64_init(void) static void __exit agp_amd64_cleanup(void) { + if (gart_iommu_aperture) + return; if (aperture_resource) release_resource(aperture_resource); pci_unregister_driver(&agp_amd64_pci_driver); -- cgit v1.2.3 From 864a0922dd128392467611d9857e5138c6a91999 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 13 Jan 2010 10:16:07 +0000 Subject: x86: kernel_thread() -- initialize SS to a known state Before the kernel_thread was converted into "C" we had pt_regs::ss set to __KERNEL_DS (by SAVE_ALL asm macro). Though I must admit I didn't find any *explicit* load of %ss from this structure the better to be on a safe side and set it to a known value. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ian Campbell Cc: Christian Kujau Cc: Jeremy Fitzhardinge Cc: Brian Gerst LKML-Reference: <1263377768-19600-1-git-send-email-ian.campbell@citrix.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c6ee241c8a98..02c3ee013ccd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -288,6 +288,8 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; regs.gs = __KERNEL_STACK_CANARY; +#else + regs.ss = __KERNEL_DS; #endif regs.orig_ax = -1; -- cgit v1.2.3 From e68266b7001a4e29af083716f0c36c0d6dbb1b39 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 13 Jan 2010 10:16:08 +0000 Subject: x86: xen: 64-bit kernel RPL should be 0 Under Xen 64 bit guests actually run their kernel in ring 3, however the hypervisor takes care of squashing descriptor the RPLs transparently (in order to allow them to continue to differentiate between user and kernel space CS using the RPL). Therefore the Xen paravirt backend should use RPL==0 instead of 1 (or 3). Using RPL==1 causes generic arch code to take incorrect code paths because it uses "testl $3, , je foo" type tests for a userspace CS and this considers 1==userspace. This issue was previously masked because get_kernel_rpl() was omitted when setting CS in kernel_thread(). This was fixed when kernel_thread() was unified with 32 bit in f443ff4201dd25cd4dec183f9919ecba90c8edc2. Signed-off-by: Ian Campbell Cc: Christian Kujau Cc: Jeremy Fitzhardinge Cc: Cyrill Gorcunov Cc: Brian Gerst LKML-Reference: <1263377768-19600-2-git-send-email-ian.campbell@citrix.com> Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2b26dd5930c6..36daccb68642 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1151,9 +1151,13 @@ asmlinkage void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ +#ifdef CONFIG_X86_32 pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; +#else + pv_info.kernel_rpl = 0; +#endif /* set the limit of our address space */ xen_reserve_top(); -- cgit v1.2.3 From 557a701c16553b0b691dbb64ef30361115a80f64 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 14 Dec 2009 11:44:15 +0100 Subject: [CPUFREQ] Fix use after free of struct powernow_k8_data Easy fix for a regression introduced in 2.6.31. On managed CPUs the cpufreq.c core will call driver->exit(cpu) on the managed cpus and powernow_k8 will free the core's data. Later driver->get(cpu) function might get called trying to read out the current freq of a managed cpu and the NULL pointer check does not work on the freed object -> better set it to NULL. ->get() is unsigned and must return 0 as invalid frequency. Reference: http://bugzilla.kernel.org/show_bug.cgi?id=14391 Signed-off-by: Thomas Renninger Tested-by: Michal Schmidt CC: stable@kernel.org Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9df9441a9a2..2da4fa3bf6e9 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) kfree(data->powernow_table); kfree(data); + per_cpu(powernow_data, pol->cpu) = NULL; return 0; } @@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu) int err; if (!data) - return -EINVAL; + return 0; smp_call_function_single(cpu, query_values_on_cpu, &err, true); if (err) -- cgit v1.2.3 From 7a1110e861b2666ac09f5708d6fbe71d18ce64bb Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 12 Jan 2010 15:09:04 -0600 Subject: x86, uv: Add function retrieving node controller revision number Add function for determining the revision id of the SGI UV node controller chip (HUB). This function is needed in a subsequent patch. Signed-off-by: Jack Steiner LKML-Reference: <20100112210904.GA24546@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_hub.h | 12 ++++++++++++ arch/x86/kernel/apic/x2apic_uv_x.c | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index bc54fa965af3..40be813fefb1 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -495,5 +495,17 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } +/* + * Get the minimum revision number of the hub chips within the partition. + * 1 - initial rev 1.0 silicon + * 2 - rev 2.0 production silicon + */ +static inline int uv_get_min_hub_revision_id(void) +{ + extern int uv_min_hub_revision_id; + + return uv_min_hub_revision_id; +} + #endif /* CONFIG_X86_64 */ #endif /* _ASM_X86_UV_UV_HUB_H */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b8bb869a6618..0e48de9ff864 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -36,6 +36,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; +int uv_min_hub_revision_id; +EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); static inline bool is_GRU_range(u64 start, u64 end) { @@ -55,6 +57,10 @@ static int early_get_nodeid(void) mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); node_id.v = *mmr; early_iounmap(mmr, sizeof(*mmr)); + + /* Currently, all blades have same revision number */ + uv_min_hub_revision_id = node_id.s.revision; + return node_id.s.node_id; } -- cgit v1.2.3 From 1d2c867c941d635e53e8ad7bf37d060bb5b25ec5 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 15 Jan 2010 12:09:09 -0600 Subject: x86, uv: Ensure hub revision set for all ACPI modes. Ensure that UV hub revision is set for all ACPI modes. Signed-off-by: Russ Anderson LKML-Reference: <20100115180908.GB7757@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0e48de9ff864..21db3cbea7dc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -66,7 +66,10 @@ static int early_get_nodeid(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { + int nodeid; + if (!strcmp(oem_id, "SGI")) { + nodeid = early_get_nodeid(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; if (!strcmp(oem_table_id, "UVL")) uv_system_type = UV_LEGACY_APIC; @@ -74,7 +77,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); + nodeid << (UV_APIC_PNODE_SHIFT - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } -- cgit v1.2.3 From 0bb7a95f5455cd87e6a69e5818bc1f509a98d187 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 16 Jan 2010 10:39:30 +0100 Subject: hw-breakpoints, perf: Fix broken mmiotrace due to dr6 by reference change Commit 62edab9056a6cf0c9207339c8892c923a5217e45 (from June 2009 but merged in 2.6.33) changes notify_die to pass dr6 by reference. However, it forgets to fix the check for DR_STEP in kmmio.c, breaking mmiotrace. It also passes a wrong value to the post handler. This simple fix makes mmiotrace work again. Signed-off-by: Luca Barbieri Acked-by: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <1263634770-14578-1-git-send-email-luca@luca-barbieri.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c0f6198565eb..536fb6823366 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -538,14 +538,15 @@ static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) { struct die_args *arg = args; + unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err); - if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) { + if (val == DIE_DEBUG && (*dr6_p & DR_STEP)) + if (post_kmmio_handler(*dr6_p, arg->regs) == 1) { /* * Reset the BS bit in dr6 (pointed by args->err) to * denote completion of processing */ - (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; + *dr6_p &= ~DR_STEP; return NOTIFY_STOP; } -- cgit v1.2.3 From dfea91d5a7c795fd6f4e1a97489a98e4e767463e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:48 -0800 Subject: x86, apic: use physical mode for IBM summit platforms Chris McDermott from IBM confirmed that hurricane chipset in IBM summit platforms doesn't support logical flat mode. Irrespective of the other things like apic_id's, total number of logical cpu's, Linux kernel should default to physical mode for this system. The 32-bit kernel does so using the OEM checks for the IBM summit platform. Add a similar OEM platform check for the 64bit kernel too. Otherwise the linux kernel boot can hang on this platform under certain bios/platform settings. Signed-off-by: Suresh Siddha Tested-by: Ananth N Mavinakayanahalli Cc: Chris McDermott Cc: Yinghai Lu Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic_flat_64.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index eacbd2b31d27..e3c3d820c325 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; } + + if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { + printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); + return 1; + } #endif return 0; -- cgit v1.2.3 From bb668da6d6f2bec8a63838c098d9515eccb22cc4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:49 -0800 Subject: x86, apic: use logical flat for systems with <= 8 logical cpus We can use logical flat mode if there are <= 8 logical cpu's (irrespective of physical apic id values). This will enable simplified and efficient IPI and device interrupt routing on such platforms. This has been tested to work on both Intel and AMD platforms. Exceptions like IBM summit platform which can't use logical flat mode are addressed by using OEM platform checks. Signed-off-by: Suresh Siddha Signed-off-by: Yinghai Lu Cc: Ananth N Mavinakayanahalli Cc: Chris McDermott Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 15 +-------------- arch/x86/kernel/apic/probe_64.c | 8 +++----- 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e80f291472a4..3987e4408f75 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. - * - * This determines the messaging protocol we can use: if all APIC IDs - * are in the 0 ... 7 range, then we can use logical addressing which - * has some performance advantages (better broadcasting). - * - * If there's an APIC ID above 8, we use physical addressing. */ unsigned int max_physical_apicid; @@ -1898,14 +1892,7 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { + if (num_processors > 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(version)) { diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 65edc180fc82..450fe2064a14 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,15 +64,13 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif - if (apic == &apic_flat) { - if (max_physical_apicid >= 8) + if (apic == &apic_flat && num_processors > 8) apic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - } + + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); if (is_vsmp_box()) { /* need to update phys_pkg_id */ -- cgit v1.2.3 From b27d515a49169e5e2a92d621faac761074a8c5b1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 18 Jan 2010 10:58:01 +0200 Subject: perf: x86: Add support for the ANY bit Propagate the ANY bit into the fixed counter config for v3 and higher. Signed-off-by: Stephane Eranian [a.p.zijlstra@chello.nl: split from larger patch] Signed-off-by: Peter Zijlstra LKML-Reference: <4b5430c6.0f975e0a.1bf9.ffff85fe@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f8548a870..1380367dabd9 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -19,6 +19,7 @@ #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d616c06e99b4..8c1c07073ccc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1343,6 +1343,13 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) bits |= 0x1; + + /* + * ANY bit is supported in v3 and up + */ + if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) + bits |= 0x4; + bits <<= (idx * 4); mask = 0xfULL << (idx * 4); -- cgit v1.2.3 From d91afd15b041f27d34859c79afa9e172018a86f4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 16:40:20 +0100 Subject: x86/amd-iommu: Fix possible integer overflow The variable i in this function could be increased to over 2**32 which would result in an integer overflow when using int. Fix it by changing i to unsigned long. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 23824fef789c..c2ccbd7b862f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -980,7 +980,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; struct amd_iommu *iommu; - int i; + unsigned long i; #ifdef CONFIG_IOMMU_STRESS populate = false; -- cgit v1.2.3 From 2ca762790caf822f7b61430fbaffa3ae4219977f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 16:45:31 +0100 Subject: x86/amd-iommu: Fix NULL pointer dereference in __detach_device() In the __detach_device function the reference count for a device-domain binding may become zero. This results in the device being removed from the domain and dev_data->domain will be NULL. This is bad because this pointer is dereferenced when trying to unlock the domain->lock. This patch fixes the issue by keeping the domain in a seperate variable. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c2ccbd7b862f..4478a48198a8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1489,11 +1489,14 @@ static void __detach_device(struct device *dev) { struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; + struct protection_domain *domain; unsigned long flags; BUG_ON(!dev_data->domain); - spin_lock_irqsave(&dev_data->domain->lock, flags); + domain = dev_data->domain; + + spin_lock_irqsave(&domain->lock, flags); if (dev_data->alias != dev) { alias_data = get_dev_data(dev_data->alias); @@ -1504,7 +1507,7 @@ static void __detach_device(struct device *dev) if (atomic_dec_and_test(&dev_data->bind)) do_detach(dev); - spin_unlock_irqrestore(&dev_data->domain->lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the -- cgit v1.2.3 From f5325094379158e6b876ea0010c807bf7890ec8f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 17:44:35 +0100 Subject: x86/amd-iommu: Fix IOMMU-API initialization for iommu=pt This patch moves the initialization of the iommu-api out of the dma-ops initialization code. This ensures that the iommu-api is initialized even with iommu=pt. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 1 + arch/x86/kernel/amd_iommu.c | 8 ++++++-- arch/x86/kernel/amd_iommu_init.c | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 4d817f9e6e77..d2544f1d705d 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -31,6 +31,7 @@ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); +extern void amd_iommu_init_api(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 4478a48198a8..751ce73c6e1b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2221,6 +2221,12 @@ static struct dma_map_ops amd_iommu_dma_ops = { /* * The function which clues the AMD IOMMU driver into dma_ops. */ + +void __init amd_iommu_init_api(void) +{ + register_iommu(&amd_iommu_ops); +} + int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; @@ -2256,8 +2262,6 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; - register_iommu(&amd_iommu_ops); - amd_iommu_stats_init(); return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fb490ce7dd55..9dc91b431470 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1292,9 +1292,12 @@ static int __init amd_iommu_init(void) ret = amd_iommu_init_passthrough(); else ret = amd_iommu_init_dma_ops(); + if (ret) goto free; + amd_iommu_init_api(); + amd_iommu_init_notifier(); enable_iommus(); -- cgit v1.2.3 From d3ad9373b7c29b63d5e8460a69453718d200cc3b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 17:55:27 +0100 Subject: x86/amd-iommu: Fix deassignment of a device from the pt_domain Deassigning a device from the passthrough domain does not work and breaks device assignment to kvm guests. This patch fixes the issue. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 751ce73c6e1b..adb0ba025702 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1511,9 +1511,11 @@ static void __detach_device(struct device *dev) /* * If we run in passthrough mode the device must be assigned to the - * passthrough domain if it is detached from any other domain + * passthrough domain if it is detached from any other domain. + * Make sure we can deassign from the pt_domain itself. */ - if (iommu_pass_through && dev_data->domain == NULL) + if (iommu_pass_through && + (dev_data->domain == NULL && domain != pt_domain)) __attach_device(dev, pt_domain); } -- cgit v1.2.3 From 3a5fc0e40cb467e692737bc798bc99773c81e1e2 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 20 Jan 2010 12:10:47 -0800 Subject: x86: Set hotpluggable nodes in nodes_possible_map nodes_possible_map does not currently include nodes that have SRAT entries that are all ACPI_SRAT_MEM_HOT_PLUGGABLE since the bit is cleared in nodes_parsed if it does not have an online address range. Unequivocally setting the bit in nodes_parsed is insufficient since existing code, such as acpi_get_nodes(), assumes all nodes in the map have online address ranges. In fact, all code using nodes_parsed assumes such nodes represent an address range of online memory. nodes_possible_map is created by unioning nodes_parsed and cpu_nodes_parsed; the former represents nodes with online memory and the latter represents memoryless nodes. We now set the bit for hotpluggable nodes in cpu_nodes_parsed so that it also gets set in nodes_possible_map. [ hpa: Haicheng Li points out that this makes the naming of the variable cpu_nodes_parsed somewhat counterintuitive. However, leave it as is in the interest of keeping the pure bug fix patch small. ] Signed-off-by: David Rientjes Tested-by: Haicheng Li LKML-Reference: Cc: Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a27124185fc1..28c68762648f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - if (changed) + if (changed) { + node_set(node, cpu_nodes_parsed); printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); + } } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -- cgit v1.2.3 From 73472a46b5b28116b145fb5fc05242c1aa8e1461 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 21 Jan 2010 11:09:52 -0800 Subject: x86: Disable HPET MSI on ATI SB700/SB800 HPET MSI on platforms with ATI SB700/SB800 as they seem to have some side-effects on floppy DMA. Do not use HPET MSI on such platforms. Original problem report from Mark Hounschell http://lkml.indiana.edu/hypermail/linux/kernel/0912.2/01118.html [ This patch needs to go to stable as well. But, there are some conflicts that prevents the patch from going as is. I can rebase/resubmit to stable once the patch goes upstream. hpa: still Cc:'ing stable@ as an FYI. ] Tested-by: Mark Hounschell Signed-off-by: Venkatesh Pallipadi Cc: LKML-Reference: <20100121190952.GA32523@linux-os.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hpet.h | 1 + arch/x86/kernel/hpet.c | 8 ++++++++ arch/x86/kernel/quirks.c | 13 +++++++++++++ 3 files changed, 22 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 5d89fd2a3690..1d5c08a1bdfd 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -67,6 +67,7 @@ extern unsigned long hpet_address; extern unsigned long force_hpet_address; extern u8 hpet_blockid; extern int hpet_force_user; +extern u8 hpet_msi_disable; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba6e65884603..ad80a1c718c6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -34,6 +34,8 @@ */ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ +u8 hpet_msi_disable; + #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; #endif @@ -596,6 +598,9 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int num_timers_used = 0; int i; + if (hpet_msi_disable) + return; + if (boot_cpu_has(X86_FEATURE_ARAT)) return; id = hpet_readl(HPET_ID); @@ -928,6 +933,9 @@ static __init int hpet_late_init(void) hpet_reserve_platform_timers(hpet_readl(HPET_ID)); hpet_print_config(); + if (hpet_msi_disable) + return 0; + if (boot_cpu_has(X86_FEATURE_ARAT)) return 0; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 18093d7498f0..12e9feaa2f7a 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -491,6 +491,19 @@ void force_hpet_resume(void) break; } } + +/* + * HPET MSI on some boards (ATI SB700/SB800) has side effect on + * floppy DMA. Disable HPET MSI on such platforms. + */ +static void force_disable_hpet_msi(struct pci_dev *unused) +{ + hpet_msi_disable = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, + force_disable_hpet_msi); + #endif #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) -- cgit v1.2.3 From 3b2e3d85aeb80769fb96c15ee4f6e14135328471 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 22 Jan 2010 21:34:56 +0100 Subject: Revert "x86: ucode-amd: Load ucode-patches once ..." Commit d1c84f79a6ba992dc01e312c44a21496303874d6 leads to a regression when microcode_amd.c is compiled into the kernel. It causes a big boot delay because the firmware is not available. See http://marc.info/?l=linux-kernel&m=126267290920060 It also renders the reload sysfs attribute useless. Fixing this is too intrusive for an -rc5 kernel. Thus I'd like to restore the microcode loading behaviour of kernel 2.6.32. CC: Gene Heskett Signed-off-by: Andreas Herrmann LKML-Reference: <20100122203456.GB13792@alberich.amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 2 -- arch/x86/kernel/microcode_amd.c | 44 ++++++++++++---------------------------- arch/x86/kernel/microcode_core.c | 6 ------ 3 files changed, 13 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c24ca9a56458..ef51b501e22a 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -12,8 +12,6 @@ struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; struct microcode_ops { - void (*init)(struct device *device); - void (*fini)(void); enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 37542b67c57e..e1af7c055c7d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -36,9 +36,6 @@ MODULE_LICENSE("GPL v2"); #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 -const struct firmware *firmware; -static int supported_cpu; - struct equiv_cpu_entry { u32 installed_cpu; u32 fixed_errata_mask; @@ -77,12 +74,15 @@ static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { + struct cpuinfo_x86 *c = &cpu_data(cpu); u32 dummy; - if (!supported_cpu) - return -1; - memset(csig, 0, sizeof(*csig)); + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " + "supported\n", cpu, c->x86); + return -1; + } rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; @@ -294,10 +294,14 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) static enum ucode_state request_microcode_fw(int cpu, struct device *device) { + const char *fw_name = "amd-ucode/microcode_amd.bin"; + const struct firmware *firmware; enum ucode_state ret; - if (firmware == NULL) + if (request_firmware(&firmware, fw_name, device)) { + printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); return UCODE_NFOUND; + } if (*(u32 *)firmware->data != UCODE_MAGIC) { pr_err("invalid UCODE_MAGIC (0x%08x)\n", @@ -307,6 +311,8 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) ret = generic_load_microcode(cpu, firmware->data, firmware->size); + release_firmware(firmware); + return ret; } @@ -325,31 +331,7 @@ static void microcode_fini_cpu_amd(int cpu) uci->mc = NULL; } -void init_microcode_amd(struct device *device) -{ - const char *fw_name = "amd-ucode/microcode_amd.bin"; - struct cpuinfo_x86 *c = &boot_cpu_data; - - WARN_ON(c->x86_vendor != X86_VENDOR_AMD); - - if (c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); - return; - } - supported_cpu = 1; - - if (request_firmware(&firmware, fw_name, device)) - pr_err("failed to load file %s\n", fw_name); -} - -void fini_microcode_amd(void) -{ - release_firmware(firmware); -} - static struct microcode_ops microcode_amd_ops = { - .init = init_microcode_amd, - .fini = fini_microcode_amd, .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 0c8632433090..cceb5bc3c3c2 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -521,9 +521,6 @@ static int __init microcode_init(void) return PTR_ERR(microcode_pdev); } - if (microcode_ops->init) - microcode_ops->init(µcode_pdev->dev); - get_online_cpus(); mutex_lock(µcode_mutex); @@ -566,9 +563,6 @@ static void __exit microcode_exit(void) platform_device_unregister(microcode_pdev); - if (microcode_ops->fini) - microcode_ops->fini(); - microcode_ops = NULL; pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -- cgit v1.2.3 From b160091802d4a76dd063facb09fcf10bf5d5d747 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 23 Jan 2010 18:27:47 -0800 Subject: x86: Remove "x86 CPU features in debugfs" (CONFIG_X86_CPU_DEBUG) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_X86_CPU_DEBUG, which provides some parsed versions of the x86 CPU configuration via debugfs, has caused boot failures on real hardware. The value of this feature has been marginal at best, as all this information is already available to userspace via generic interfaces. Causes crashes that have not been fixed + minimal utility -> remove. See the referenced LKML thread for more information. Reported-by: Ozan ÇaÄŸlayan Signed-off-by: H. Peter Anvin LKML-Reference: Cc: Jaswinder Singh Rajput Cc: Linus Torvalds Cc: Rafael J. Wysocki Cc: Yinghai Lu Cc: --- arch/x86/Kconfig | 6 - arch/x86/include/asm/cpu_debug.h | 127 -------- arch/x86/kernel/cpu/Makefile | 2 - arch/x86/kernel/cpu/cpu_debug.c | 688 --------------------------------------- 4 files changed, 823 deletions(-) delete mode 100644 arch/x86/include/asm/cpu_debug.h delete mode 100644 arch/x86/kernel/cpu/cpu_debug.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cbcbfdee3ee0..eb4092568f9e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -989,12 +989,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -config X86_CPU_DEBUG - tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" - ---help--- - If you select this option, this will provide various x86 CPUs - information through debugfs. - choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h deleted file mode 100644 index d96c1ee3a95c..000000000000 --- a/arch/x86/include/asm/cpu_debug.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef _ASM_X86_CPU_DEBUG_H -#define _ASM_X86_CPU_DEBUG_H - -/* - * CPU x86 architecture debug - * - * Copyright(C) 2009 Jaswinder Singh Rajput - */ - -/* Register flags */ -enum cpu_debug_bit { -/* Model Specific Registers (MSRs) */ - CPU_MC_BIT, /* Machine Check */ - CPU_MONITOR_BIT, /* Monitor */ - CPU_TIME_BIT, /* Time */ - CPU_PMC_BIT, /* Performance Monitor */ - CPU_PLATFORM_BIT, /* Platform */ - CPU_APIC_BIT, /* APIC */ - CPU_POWERON_BIT, /* Power-on */ - CPU_CONTROL_BIT, /* Control */ - CPU_FEATURES_BIT, /* Features control */ - CPU_LBRANCH_BIT, /* Last Branch */ - CPU_BIOS_BIT, /* BIOS */ - CPU_FREQ_BIT, /* Frequency */ - CPU_MTTR_BIT, /* MTRR */ - CPU_PERF_BIT, /* Performance */ - CPU_CACHE_BIT, /* Cache */ - CPU_SYSENTER_BIT, /* Sysenter */ - CPU_THERM_BIT, /* Thermal */ - CPU_MISC_BIT, /* Miscellaneous */ - CPU_DEBUG_BIT, /* Debug */ - CPU_PAT_BIT, /* PAT */ - CPU_VMX_BIT, /* VMX */ - CPU_CALL_BIT, /* System Call */ - CPU_BASE_BIT, /* BASE Address */ - CPU_VER_BIT, /* Version ID */ - CPU_CONF_BIT, /* Configuration */ - CPU_SMM_BIT, /* System mgmt mode */ - CPU_SVM_BIT, /*Secure Virtual Machine*/ - CPU_OSVM_BIT, /* OS-Visible Workaround*/ -/* Standard Registers */ - CPU_TSS_BIT, /* Task Stack Segment */ - CPU_CR_BIT, /* Control Registers */ - CPU_DT_BIT, /* Descriptor Table */ -/* End of Registers flags */ - CPU_REG_ALL_BIT, /* Select all Registers */ -}; - -#define CPU_REG_ALL (~0) /* Select all Registers */ - -#define CPU_MC (1 << CPU_MC_BIT) -#define CPU_MONITOR (1 << CPU_MONITOR_BIT) -#define CPU_TIME (1 << CPU_TIME_BIT) -#define CPU_PMC (1 << CPU_PMC_BIT) -#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) -#define CPU_APIC (1 << CPU_APIC_BIT) -#define CPU_POWERON (1 << CPU_POWERON_BIT) -#define CPU_CONTROL (1 << CPU_CONTROL_BIT) -#define CPU_FEATURES (1 << CPU_FEATURES_BIT) -#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) -#define CPU_BIOS (1 << CPU_BIOS_BIT) -#define CPU_FREQ (1 << CPU_FREQ_BIT) -#define CPU_MTRR (1 << CPU_MTTR_BIT) -#define CPU_PERF (1 << CPU_PERF_BIT) -#define CPU_CACHE (1 << CPU_CACHE_BIT) -#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) -#define CPU_THERM (1 << CPU_THERM_BIT) -#define CPU_MISC (1 << CPU_MISC_BIT) -#define CPU_DEBUG (1 << CPU_DEBUG_BIT) -#define CPU_PAT (1 << CPU_PAT_BIT) -#define CPU_VMX (1 << CPU_VMX_BIT) -#define CPU_CALL (1 << CPU_CALL_BIT) -#define CPU_BASE (1 << CPU_BASE_BIT) -#define CPU_VER (1 << CPU_VER_BIT) -#define CPU_CONF (1 << CPU_CONF_BIT) -#define CPU_SMM (1 << CPU_SMM_BIT) -#define CPU_SVM (1 << CPU_SVM_BIT) -#define CPU_OSVM (1 << CPU_OSVM_BIT) -#define CPU_TSS (1 << CPU_TSS_BIT) -#define CPU_CR (1 << CPU_CR_BIT) -#define CPU_DT (1 << CPU_DT_BIT) - -/* Register file flags */ -enum cpu_file_bit { - CPU_INDEX_BIT, /* index */ - CPU_VALUE_BIT, /* value */ -}; - -#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) - -#define MAX_CPU_FILES 512 - -struct cpu_private { - unsigned cpu; - unsigned type; - unsigned reg; - unsigned file; -}; - -struct cpu_debug_base { - char *name; /* Register name */ - unsigned flag; /* Register flag */ - unsigned write; /* Register write flag */ -}; - -/* - * Currently it looks similar to cpu_debug_base but once we add more files - * cpu_file_base will go in different direction - */ -struct cpu_file_base { - char *name; /* Register file name */ - unsigned flag; /* Register file flag */ - unsigned write; /* Register write flag */ -}; - -struct cpu_cpuX_base { - struct dentry *dentry; /* Register dentry */ - int init; /* Register index file */ -}; - -struct cpu_debug_range { - unsigned min; /* Register range min */ - unsigned max; /* Register range max */ - unsigned flag; /* Supported flags */ -}; - -#endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1d2cb383410e..c202b62f3671 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -19,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c deleted file mode 100644 index b368cd862997..000000000000 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * CPU x86 architecture debug code - * - * Copyright(C) 2009 Jaswinder Singh Rajput - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr); -static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr); -static DEFINE_PER_CPU(int, cpud_priv_count); - -static DEFINE_MUTEX(cpu_debug_lock); - -static struct dentry *cpu_debugfs_dir; - -static struct cpu_debug_base cpu_base[] = { - { "mc", CPU_MC, 0 }, - { "monitor", CPU_MONITOR, 0 }, - { "time", CPU_TIME, 0 }, - { "pmc", CPU_PMC, 1 }, - { "platform", CPU_PLATFORM, 0 }, - { "apic", CPU_APIC, 0 }, - { "poweron", CPU_POWERON, 0 }, - { "control", CPU_CONTROL, 0 }, - { "features", CPU_FEATURES, 0 }, - { "lastbranch", CPU_LBRANCH, 0 }, - { "bios", CPU_BIOS, 0 }, - { "freq", CPU_FREQ, 0 }, - { "mtrr", CPU_MTRR, 0 }, - { "perf", CPU_PERF, 0 }, - { "cache", CPU_CACHE, 0 }, - { "sysenter", CPU_SYSENTER, 0 }, - { "therm", CPU_THERM, 0 }, - { "misc", CPU_MISC, 0 }, - { "debug", CPU_DEBUG, 0 }, - { "pat", CPU_PAT, 0 }, - { "vmx", CPU_VMX, 0 }, - { "call", CPU_CALL, 0 }, - { "base", CPU_BASE, 0 }, - { "ver", CPU_VER, 0 }, - { "conf", CPU_CONF, 0 }, - { "smm", CPU_SMM, 0 }, - { "svm", CPU_SVM, 0 }, - { "osvm", CPU_OSVM, 0 }, - { "tss", CPU_TSS, 0 }, - { "cr", CPU_CR, 0 }, - { "dt", CPU_DT, 0 }, - { "registers", CPU_REG_ALL, 0 }, -}; - -static struct cpu_file_base cpu_file[] = { - { "index", CPU_REG_ALL, 0 }, - { "value", CPU_REG_ALL, 1 }, -}; - -/* CPU Registers Range */ -static struct cpu_debug_range cpu_reg_range[] = { - { 0x00000000, 0x00000001, CPU_MC, }, - { 0x00000006, 0x00000007, CPU_MONITOR, }, - { 0x00000010, 0x00000010, CPU_TIME, }, - { 0x00000011, 0x00000013, CPU_PMC, }, - { 0x00000017, 0x00000017, CPU_PLATFORM, }, - { 0x0000001B, 0x0000001B, CPU_APIC, }, - { 0x0000002A, 0x0000002B, CPU_POWERON, }, - { 0x0000002C, 0x0000002C, CPU_FREQ, }, - { 0x0000003A, 0x0000003A, CPU_CONTROL, }, - { 0x00000040, 0x00000047, CPU_LBRANCH, }, - { 0x00000060, 0x00000067, CPU_LBRANCH, }, - { 0x00000079, 0x00000079, CPU_BIOS, }, - { 0x00000088, 0x0000008A, CPU_CACHE, }, - { 0x0000008B, 0x0000008B, CPU_BIOS, }, - { 0x0000009B, 0x0000009B, CPU_MONITOR, }, - { 0x000000C1, 0x000000C4, CPU_PMC, }, - { 0x000000CD, 0x000000CD, CPU_FREQ, }, - { 0x000000E7, 0x000000E8, CPU_PERF, }, - { 0x000000FE, 0x000000FE, CPU_MTRR, }, - - { 0x00000116, 0x0000011E, CPU_CACHE, }, - { 0x00000174, 0x00000176, CPU_SYSENTER, }, - { 0x00000179, 0x0000017B, CPU_MC, }, - { 0x00000186, 0x00000189, CPU_PMC, }, - { 0x00000198, 0x00000199, CPU_PERF, }, - { 0x0000019A, 0x0000019A, CPU_TIME, }, - { 0x0000019B, 0x0000019D, CPU_THERM, }, - { 0x000001A0, 0x000001A0, CPU_MISC, }, - { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, - { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, }, - { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, }, - { 0x00000250, 0x00000250, CPU_MTRR, }, - { 0x00000258, 0x00000259, CPU_MTRR, }, - { 0x00000268, 0x0000026F, CPU_MTRR, }, - { 0x00000277, 0x00000277, CPU_PAT, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, }, - - { 0x00000300, 0x00000311, CPU_PMC, }, - { 0x00000345, 0x00000345, CPU_PMC, }, - { 0x00000360, 0x00000371, CPU_PMC, }, - { 0x0000038D, 0x00000390, CPU_PMC, }, - { 0x000003A0, 0x000003BE, CPU_PMC, }, - { 0x000003C0, 0x000003CD, CPU_PMC, }, - { 0x000003E0, 0x000003E1, CPU_PMC, }, - { 0x000003F0, 0x000003F2, CPU_PMC, }, - - { 0x00000400, 0x00000417, CPU_MC, }, - { 0x00000480, 0x0000048B, CPU_VMX, }, - - { 0x00000600, 0x00000600, CPU_DEBUG, }, - { 0x00000680, 0x0000068F, CPU_LBRANCH, }, - { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, - - { 0x000107CC, 0x000107D3, CPU_PMC, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, }, - { 0xC0000081, 0xC0000084, CPU_CALL, }, - { 0xC0000100, 0xC0000102, CPU_BASE, }, - { 0xC0000103, 0xC0000103, CPU_TIME, }, - - { 0xC0010000, 0xC0010007, CPU_PMC, }, - { 0xC0010010, 0xC0010010, CPU_CONF, }, - { 0xC0010015, 0xC0010015, CPU_CONF, }, - { 0xC0010016, 0xC001001A, CPU_MTRR, }, - { 0xC001001D, 0xC001001D, CPU_MTRR, }, - { 0xC001001F, 0xC001001F, CPU_CONF, }, - { 0xC0010030, 0xC0010035, CPU_BIOS, }, - { 0xC0010044, 0xC0010048, CPU_MC, }, - { 0xC0010050, 0xC0010056, CPU_SMM, }, - { 0xC0010058, 0xC0010058, CPU_CONF, }, - { 0xC0010060, 0xC0010060, CPU_CACHE, }, - { 0xC0010061, 0xC0010068, CPU_SMM, }, - { 0xC0010069, 0xC001006B, CPU_SMM, }, - { 0xC0010070, 0xC0010071, CPU_SMM, }, - { 0xC0010111, 0xC0010113, CPU_SMM, }, - { 0xC0010114, 0xC0010118, CPU_SVM, }, - { 0xC0010140, 0xC0010141, CPU_OSVM, }, - { 0xC0011022, 0xC0011023, CPU_CONF, }, -}; - -static int is_typeflag_valid(unsigned cpu, unsigned flag) -{ - int i; - - /* Standard Registers should be always valid */ - if (flag >= CPU_TSS) - return 1; - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (cpu_reg_range[i].flag == flag) - return 1; - } - - /* Invalid */ - return 0; -} - -static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, - int index, unsigned flag) -{ - if (cpu_reg_range[index].flag == flag) { - *min = cpu_reg_range[index].min; - *max = cpu_reg_range[index].max; - } else - *max = 0; - - return *max; -} - -/* This function can also be called with seq = NULL for printk */ -static void print_cpu_data(struct seq_file *seq, unsigned type, - u32 low, u32 high) -{ - struct cpu_private *priv; - u64 val = high; - - if (seq) { - priv = seq->private; - if (priv->file) { - val = (val << 32) | low; - seq_printf(seq, "0x%llx\n", val); - } else - seq_printf(seq, " %08x: %08x_%08x\n", - type, high, low); - } else - printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); -} - -/* This function can also be called with seq = NULL for printk */ -static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) -{ - unsigned msr, msr_min, msr_max; - struct cpu_private *priv; - u32 low, high; - int i; - - if (seq) { - priv = seq->private; - if (priv->file) { - if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, - &low, &high)) - print_cpu_data(seq, priv->reg, low, high); - return; - } - } - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) - continue; - - for (msr = msr_min; msr <= msr_max; msr++) { - if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) - continue; - print_cpu_data(seq, msr, low, high); - } - } -} - -static void print_tss(void *arg) -{ - struct pt_regs *regs = task_pt_regs(current); - struct seq_file *seq = arg; - unsigned int seg; - - seq_printf(seq, " RAX\t: %016lx\n", regs->ax); - seq_printf(seq, " RBX\t: %016lx\n", regs->bx); - seq_printf(seq, " RCX\t: %016lx\n", regs->cx); - seq_printf(seq, " RDX\t: %016lx\n", regs->dx); - - seq_printf(seq, " RSI\t: %016lx\n", regs->si); - seq_printf(seq, " RDI\t: %016lx\n", regs->di); - seq_printf(seq, " RBP\t: %016lx\n", regs->bp); - seq_printf(seq, " ESP\t: %016lx\n", regs->sp); - -#ifdef CONFIG_X86_64 - seq_printf(seq, " R08\t: %016lx\n", regs->r8); - seq_printf(seq, " R09\t: %016lx\n", regs->r9); - seq_printf(seq, " R10\t: %016lx\n", regs->r10); - seq_printf(seq, " R11\t: %016lx\n", regs->r11); - seq_printf(seq, " R12\t: %016lx\n", regs->r12); - seq_printf(seq, " R13\t: %016lx\n", regs->r13); - seq_printf(seq, " R14\t: %016lx\n", regs->r14); - seq_printf(seq, " R15\t: %016lx\n", regs->r15); -#endif - - asm("movl %%cs,%0" : "=r" (seg)); - seq_printf(seq, " CS\t: %04x\n", seg); - asm("movl %%ds,%0" : "=r" (seg)); - seq_printf(seq, " DS\t: %04x\n", seg); - seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); - asm("movl %%es,%0" : "=r" (seg)); - seq_printf(seq, " ES\t: %04x\n", seg); - asm("movl %%fs,%0" : "=r" (seg)); - seq_printf(seq, " FS\t: %04x\n", seg); - asm("movl %%gs,%0" : "=r" (seg)); - seq_printf(seq, " GS\t: %04x\n", seg); - - seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); - - seq_printf(seq, " EIP\t: %016lx\n", regs->ip); -} - -static void print_cr(void *arg) -{ - struct seq_file *seq = arg; - - seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); - seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); - seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); - seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); -#ifdef CONFIG_X86_64 - seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); -#endif -} - -static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) -{ - seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); -} - -static void print_dt(void *seq) -{ - struct desc_ptr dt; - unsigned long ldt; - - /* IDT */ - store_idt((struct desc_ptr *)&dt); - print_desc_ptr("IDT", seq, dt); - - /* GDT */ - store_gdt((struct desc_ptr *)&dt); - print_desc_ptr("GDT", seq, dt); - - /* LDT */ - store_ldt(ldt); - seq_printf(seq, " LDT\t: %016lx\n", ldt); - - /* TR */ - store_tr(ldt); - seq_printf(seq, " TR\t: %016lx\n", ldt); -} - -static void print_dr(void *arg) -{ - struct seq_file *seq = arg; - unsigned long dr; - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - get_debugreg(dr, i); - seq_printf(seq, " dr%d\t: %016lx\n", i, dr); - } - - seq_printf(seq, "\n MSR\t:\n"); -} - -static void print_apic(void *arg) -{ - struct seq_file *seq = arg; - -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(seq, " LAPIC\t:\n"); - seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); - seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); - seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); - seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); - seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); - seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); - seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); - seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); - seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); - seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); - seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); - seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); - seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); - seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); - seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); - seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); - seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); - seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); - seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); - seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); - seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); - if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { - unsigned int i, v, maxeilvt; - - v = apic_read(APIC_EFEAT); - maxeilvt = (v >> 16) & 0xff; - seq_printf(seq, " EFEAT\t\t: %08x\n", v); - seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); - - for (i = 0; i < maxeilvt; i++) { - v = apic_read(APIC_EILVTn(i)); - seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); - } - } -#endif /* CONFIG_X86_LOCAL_APIC */ - seq_printf(seq, "\n MSR\t:\n"); -} - -static int cpu_seq_show(struct seq_file *seq, void *v) -{ - struct cpu_private *priv = seq->private; - - if (priv == NULL) - return -EINVAL; - - switch (cpu_base[priv->type].flag) { - case CPU_TSS: - smp_call_function_single(priv->cpu, print_tss, seq, 1); - break; - case CPU_CR: - smp_call_function_single(priv->cpu, print_cr, seq, 1); - break; - case CPU_DT: - smp_call_function_single(priv->cpu, print_dt, seq, 1); - break; - case CPU_DEBUG: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_dr, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - case CPU_APIC: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_apic, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - - default: - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - } - seq_printf(seq, "\n"); - - return 0; -} - -static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos == 0) /* One time is enough ;-) */ - return seq; - - return NULL; -} - -static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - - return cpu_seq_start(seq, pos); -} - -static void cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations cpu_seq_ops = { - .start = cpu_seq_start, - .next = cpu_seq_next, - .stop = cpu_seq_stop, - .show = cpu_seq_show, -}; - -static int cpu_seq_open(struct inode *inode, struct file *file) -{ - struct cpu_private *priv = inode->i_private; - struct seq_file *seq; - int err; - - err = seq_open(file, &cpu_seq_ops); - if (!err) { - seq = file->private_data; - seq->private = priv; - } - - return err; -} - -static int write_msr(struct cpu_private *priv, u64 val) -{ - u32 low, high; - - high = (val >> 32) & 0xffffffff; - low = val & 0xffffffff; - - if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) - return 0; - - return -EPERM; -} - -static int write_cpu_register(struct cpu_private *priv, const char *buf) -{ - int ret = -EPERM; - u64 val; - - ret = strict_strtoull(buf, 0, &val); - if (ret < 0) - return ret; - - /* Supporting only MSRs */ - if (priv->type < CPU_TSS_BIT) - return write_msr(priv, val); - - return ret; -} - -static ssize_t cpu_write(struct file *file, const char __user *ubuf, - size_t count, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct cpu_private *priv = seq->private; - char buf[19]; - - if ((priv == NULL) || (count >= sizeof(buf))) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, count)) - return -EFAULT; - - buf[count] = 0; - - if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) - if (!write_cpu_register(priv, buf)) - return count; - - return -EACCES; -} - -static const struct file_operations cpu_fops = { - .owner = THIS_MODULE, - .open = cpu_seq_open, - .read = seq_read, - .write = cpu_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, - unsigned file, struct dentry *dentry) -{ - struct cpu_private *priv = NULL; - - /* Already intialized */ - if (file == CPU_INDEX_BIT) - if (per_cpu(cpud_arr[type].init, cpu)) - return 0; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv == NULL) - return -ENOMEM; - - priv->cpu = cpu; - priv->type = type; - priv->reg = reg; - priv->file = file; - mutex_lock(&cpu_debug_lock); - per_cpu(cpud_priv_arr[type], cpu) = priv; - per_cpu(cpud_priv_count, cpu)++; - mutex_unlock(&cpu_debug_lock); - - if (file) - debugfs_create_file(cpu_file[file].name, S_IRUGO, - dentry, (void *)priv, &cpu_fops); - else { - debugfs_create_file(cpu_base[type].name, S_IRUGO, - per_cpu(cpud_arr[type].dentry, cpu), - (void *)priv, &cpu_fops); - mutex_lock(&cpu_debug_lock); - per_cpu(cpud_arr[type].init, cpu) = 1; - mutex_unlock(&cpu_debug_lock); - } - - return 0; -} - -static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, - struct dentry *dentry) -{ - unsigned file; - int err = 0; - - for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { - err = cpu_create_file(cpu, type, reg, file, dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned reg, reg_min, reg_max; - int i, err = 0; - char reg_dir[12]; - u32 low, high; - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (!get_cpu_range(cpu, ®_min, ®_max, i, - cpu_base[type].flag)) - continue; - - for (reg = reg_min; reg <= reg_max; reg++) { - if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) - continue; - - sprintf(reg_dir, "0x%x", reg); - cpu_dentry = debugfs_create_dir(reg_dir, dentry); - err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); - if (err) - return err; - } - } - - return err; -} - -static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned type; - int err = 0; - - for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { - if (!is_typeflag_valid(cpu, cpu_base[type].flag)) - continue; - cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); - per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry; - - if (type < CPU_TSS_BIT) - err = cpu_init_msr(cpu, type, cpu_dentry); - else - err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, - cpu_dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_cpu(void) -{ - struct dentry *cpu_dentry = NULL; - struct cpuinfo_x86 *cpui; - char cpu_dir[12]; - unsigned cpu; - int err = 0; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) { - cpui = &cpu_data(cpu); - if (!cpu_has(cpui, X86_FEATURE_MSR)) - continue; - - sprintf(cpu_dir, "cpu%d", cpu); - cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); - err = cpu_init_allreg(cpu, cpu_dentry); - - pr_info("cpu%d(%d) debug files %d\n", - cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu)); - if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) { - pr_err("Register files count %d exceeds limit %d\n", - per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES); - per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES; - err = -ENFILE; - } - if (err) - return err; - } - - return err; -} - -static int __init cpu_debug_init(void) -{ - cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); - - return cpu_init_cpu(); -} - -static void __exit cpu_debug_exit(void) -{ - int i, cpu; - - if (cpu_debugfs_dir) - debugfs_remove_recursive(cpu_debugfs_dir); - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++) - kfree(per_cpu(cpud_priv_arr[i], cpu)); -} - -module_init(cpu_debug_init); -module_exit(cpu_debug_exit); - -MODULE_AUTHOR("Jaswinder Singh Rajput"); -MODULE_DESCRIPTION("CPU Debug module"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From a5d36f82c4f3e852b61fdf1fee13463c8aa91b90 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Dec 2009 12:42:16 +0200 Subject: KVM: Fix race between APIC TMR and IRR When we queue an interrupt to the local apic, we set the IRR before the TMR. The vcpu can pick up the IRR and inject the interrupt before setting the TMR, and perhaps even EOI it, causing incorrect behaviour. The race is really insignificant since it can only occur on the first interrupt (usually following interrupts will not change TMR), but it's better closed than open. Fixed by reordering setting the TMR vs IRR. Cc: stable@kernel.org Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3063a0c4858b..ba8c045da782 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; + if (trig_mode) { + apic_debug("level trig mode for vector %d", vector); + apic_set_vector(vector, apic->regs + APIC_TMR); + } else + apic_clear_vector(vector, apic->regs + APIC_TMR); + result = !apic_test_and_set_irr(vector, apic); trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector, !result); @@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; } - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); kvm_vcpu_kick(vcpu); break; -- cgit v1.2.3 From 82b7005f0e72d8d1a8226e4c192cbb0850d10b3f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 5 Jan 2010 19:02:28 +0800 Subject: KVM: x86: Fix host_mapping_level() When found a error hva, should not return PAGE_SIZE but the level... Also clean up the coding style of the following loop. Cc: stable@kernel.org Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4c3e5b2314cb..89a49fb46a27 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) - return page_size; + return PT_PAGE_TABLE_LEVEL; down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); @@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; - for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { - + for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) break; - } return level - 1; } -- cgit v1.2.3 From a6085fbaf65ab09bfb5ec8d902d6d21680fe1895 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 14 Jan 2010 17:41:27 -0200 Subject: KVM: MMU: bail out pagewalk on kvm_read_guest error Exit the guest pagetable walk loop if reading gpte failed. Otherwise its possible to enter an endless loop processing the previous present pte. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 58a0f1e88596..ede2131a9225 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -150,7 +150,9 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) + goto not_present; + trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) -- cgit v1.2.3 From 36cb93fd6b6bf7e9163a69a8bf20207aed5fea44 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jan 2010 14:18:47 +0800 Subject: KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks vcpu->arch.mce_banks is malloc in kvm_arch_vcpu_init(), but never free in any place, this may cause memory leak. So this patch fixed to free it in kvm_arch_vcpu_uninit(). Cc: stable@kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6651dbf58675..b265eecc741f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5088,6 +5088,7 @@ fail: void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { + kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); down_read(&vcpu->kvm->slots_lock); kvm_mmu_destroy(vcpu); -- cgit v1.2.3 From 443c39bc9ef7d8f648408d74c97e943f3bb3f48a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jan 2010 14:21:29 +0800 Subject: KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init() In function kvm_arch_vcpu_init(), if the memory malloc for vcpu->arch.mce_banks is fail, it does not free the memory of lapic date. This patch fixed it. Cc: stable@kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b265eecc741f..1ddcad452add 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) GFP_KERNEL); if (!vcpu->arch.mce_banks) { r = -ENOMEM; - goto fail_mmu_destroy; + goto fail_free_lapic; } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; return 0; - +fail_free_lapic: + kvm_free_lapic(vcpu); fail_mmu_destroy: kvm_mmu_destroy(vcpu); fail_free_pio_data: -- cgit v1.2.3 From d8cc108f4fab42b380c6b3f3356f99e8dd5372e2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Jan 2010 11:25:36 -0600 Subject: oprofile/x86: fix crash when profiling more than 28 events With multiplexing enabled oprofile crashs when profiling more than 28 events. This patch fixes this. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cb88b1a0bd5f..76d4f566adee 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -222,7 +222,7 @@ static void nmi_cpu_switch(void *dummy) /* move to next set */ si += model->num_counters; - if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + if ((si >= model->num_virt_counters) || (counter_config[si].count == 0)) per_cpu(switch_index, cpu) = 0; else per_cpu(switch_index, cpu) = si; -- cgit v1.2.3 From e83e452b0692c9c13372540deb88a77d4ae2553d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Jan 2010 23:26:27 +0100 Subject: oprofile/x86: add Xeon 7500 series support Add Xeon 7500 series support to oprofile. Straight forward: it's the same as Core i7, so just detect the model number. No user space changes needed. Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 76d4f566adee..3347f696edc7 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -598,6 +598,7 @@ static int __init ppro_init(char **cpu_type) case 15: case 23: *cpu_type = "i386/core_2"; break; + case 0x2e: case 26: spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; -- cgit v1.2.3 From da482474b8396e1a099c37ffc6541b78775aedb4 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Tue, 26 Jan 2010 20:37:22 -0600 Subject: x86, msr/cpuid: Pass the number of minors when unregistering MSR and CPUID drivers. Pass the number of minors when unregistering MSR and CPUID drivers. Reported-by: Dean Nelson Signed-off-by: Dean Nelson LKML-Reference: <20100127023722.GA22305@sgi.com> Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/msr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index cb27fd6136c9..83e5e628de73 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -229,7 +229,7 @@ static void __exit cpuid_exit(void) for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4bd93c9b2b27..206735ac8cbd 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -285,7 +285,7 @@ static void __exit msr_exit(void) for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); - unregister_chrdev(MSR_MAJOR, "cpu/msr"); + __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); } -- cgit v1.2.3 From aca3bb5910119d4cf6c28568a642582efb4cc14a Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 22 Jan 2010 09:41:40 -0600 Subject: x86, UV: Fix RTC latency bug by reading replicated cachelines For SGI UV node controllers (HUB) rev 2.0 or greater, use replicated cachelines to read the RTC timer. This optimization allows faster simulataneous reads from a given socket. Signed-off-by: Dimitri Sivanich Cc: Jack Steiner LKML-Reference: <20100122154140.GB4975@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 13 ++++++++++++- drivers/char/uv_mmtimer.c | 18 +++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 3c84aa001c11..2b75ef638dbc 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -282,10 +282,21 @@ static int uv_rtc_unset_timer(int cpu, int force) /* * Read the RTC. + * + * Starting with HUB rev 2.0, the UV RTC register is replicated across all + * cachelines of it's own page. This allows faster simultaneous reads + * from a given socket. */ static cycle_t uv_read_rtc(struct clocksource *cs) { - return (cycle_t)uv_read_local_mmr(UVH_RTC); + unsigned long offset; + + if (uv_get_min_hub_revision_id() == 1) + offset = 0; + else + offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; + + return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); } /* diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c index 867b67be9f0a..c7072ba14f48 100644 --- a/drivers/char/uv_mmtimer.c +++ b/drivers/char/uv_mmtimer.c @@ -89,13 +89,17 @@ static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case MMTIMER_GETOFFSET: /* offset of the counter */ /* - * UV RTC register is on its own page + * Starting with HUB rev 2.0, the UV RTC register is + * replicated across all cachelines of it's own page. + * This allows faster simultaneous reads from a given socket. + * + * The offset returned is in 64 bit units. */ - if (PAGE_SIZE <= (1 << 16)) - ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1)) - / 8; + if (uv_get_min_hub_revision_id() == 1) + ret = 0; else - ret = -ENOSYS; + ret = ((uv_blade_processor_id() * L1_CACHE_BYTES) % + PAGE_SIZE) / 8; break; case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ @@ -115,8 +119,8 @@ static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK); break; - case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ - ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; + case MMTIMER_MMAPAVAIL: + ret = 1; break; case MMTIMER_GETCOUNTER: -- cgit v1.2.3 From 35ea63d70f827a26c150993b4b940925bb02b03f Mon Sep 17 00:00:00 2001 From: Leann Ogasawara Date: Wed, 27 Jan 2010 15:29:18 -0800 Subject: x86: Add Dell OptiPlex 760 reboot quirk Dell OptiPlex 760 hangs on reboot unless reboot=bios is used. Add quirk to reboot through the BIOS. BugLink: https://bugs.launchpad.net/bugs/488319 Signed-off-by: Leann Ogasawara LKML-Reference: <1264634958.27335.1091.camel@emiko> Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1545bc0c9845..704bddcdf64d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0T656F"), }, }, + { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 760", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"), + DMI_MATCH(DMI_BOARD_NAME, "0G919G"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", -- cgit v1.2.3 From e8e06eae4ffd683931b928f460c11c40cd3f7fd8 Mon Sep 17 00:00:00 2001 From: Jeff Garrett Date: Wed, 27 Jan 2010 22:02:26 -0600 Subject: x86/PCI: remove IOH range fetching Turned out to cause trouble on single IOH machines, and is superceded by _CRS on multi-IOH machines with production BIOSes. Signed-off-by: Jeff Garrett Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/intel_bus.c | 94 ------------------------------------------------ 2 files changed, 1 insertion(+), 95 deletions(-) delete mode 100644 arch/x86/pci/intel_bus.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 564b008a51c7..39fba37f702f 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o -obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c deleted file mode 100644 index f81a2fa8fe25..000000000000 --- a/arch/x86/pci/intel_bus.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * to read io range from IOH pci conf, need to do it after mmconfig is there - */ - -#include -#include -#include -#include -#include - -#include "bus_numa.h" - -static inline void print_ioh_resources(struct pci_root_info *info) -{ - int res_num; - int busnum; - int i; - - printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", - info->bus_min, info->bus_max); - res_num = info->res_num; - busnum = info->bus_min; - for (i = 0; i < res_num; i++) { - struct resource *res; - - res = &info->res[i]; - printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", - busnum, i, - (res->flags & IORESOURCE_IO) ? "io port" : - "mmio", - res->start, res->end); - } -} - -#define IOH_LIO 0x108 -#define IOH_LMMIOL 0x10c -#define IOH_LMMIOH 0x110 -#define IOH_LMMIOH_BASEU 0x114 -#define IOH_LMMIOH_LIMITU 0x118 -#define IOH_LCFGBUS 0x11c - -static void __devinit pci_root_bus_res(struct pci_dev *dev) -{ - u16 word; - u32 dword; - struct pci_root_info *info; - u16 io_base, io_end; - u32 mmiol_base, mmiol_end; - u64 mmioh_base, mmioh_end; - int bus_base, bus_end; - - /* some sys doesn't get mmconf enabled */ - if (dev->cfg_size < 0x120) - return; - - if (pci_root_num >= PCI_ROOT_NR) { - printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); - return; - } - - info = &pci_root_info[pci_root_num]; - pci_root_num++; - - pci_read_config_word(dev, IOH_LCFGBUS, &word); - bus_base = (word & 0xff); - bus_end = (word & 0xff00) >> 8; - sprintf(info->name, "PCI Bus #%02x", bus_base); - info->bus_min = bus_base; - info->bus_max = bus_end; - - pci_read_config_word(dev, IOH_LIO, &word); - io_base = (word & 0xf0) << (12 - 4); - io_end = (word & 0xf000) | 0xfff; - update_res(info, io_base, io_end, IORESOURCE_IO, 0); - - pci_read_config_dword(dev, IOH_LMMIOL, &dword); - mmiol_base = (dword & 0xff00) << (24 - 8); - mmiol_end = (dword & 0xff000000) | 0xffffff; - update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); - - pci_read_config_dword(dev, IOH_LMMIOH, &dword); - mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); - mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); - pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); - mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; - pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); - mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; - update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); - - print_ioh_resources(info); -} - -/* intel IOH */ -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); -- cgit v1.2.3 From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jan 2010 22:14:42 -0800 Subject: Split 'flush_old_exec' into two functions 'flush_old_exec()' is the point of no return when doing an execve(), and it is pretty badly misnamed. It doesn't just flush the old executable environment, it also starts up the new one. Which is very inconvenient for things like setting up the new personality, because we want the new personality to affect the starting of the new environment, but at the same time we do _not_ want the new personality to take effect if flushing the old one fails. As a result, the x86-64 '32-bit' personality is actually done using this insane "I'm going to change the ABI, but I haven't done it yet" bit (TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the personality, but just the "pending" bit, so that "flush_thread()" can do the actual personality magic. This patch in no way changes any of that insanity, but it does split the 'flush_old_exec()' function up into a preparatory part that can fail (still called flush_old_exec()), and a new part that will actually set up the new exec environment (setup_new_exec()). All callers are changed to trivially comply with the new world order. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/sh/kernel/process_64.c | 2 +- arch/x86/ia32/ia32_aout.c | 10 ++++++---- fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 27 ++------------------------- fs/binfmt_elf_fdpic.c | 3 +++ fs/binfmt_flat.c | 1 + fs/binfmt_som.c | 1 + fs/exec.c | 26 ++++++++++++++++---------- include/linux/binfmts.h | 1 + include/linux/sched.h | 2 +- 10 files changed, 33 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 31f80c61b031..ec79faf6f021 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -368,7 +368,7 @@ void exit_thread(void) void flush_thread(void) { - /* Called by fs/exec.c (flush_old_exec) to remove traces of a + /* Called by fs/exec.c (setup_new_exec) to remove traces of a * previously running executable. */ #ifdef CONFIG_SH_FPU if (last_task_used_math == current) { diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2a4d073d2cf1..435d2a5323da 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval) return retval; - regs->cs = __USER32_CS; - regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = - regs->r13 = regs->r14 = regs->r15 = 0; - /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); clear_thread_flag(TIF_ABI_PENDING); + setup_new_exec(bprm); + + regs->cs = __USER32_CS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); current->mm->end_data = ex.a_data + diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 346b69405363..fdd397099172 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else set_personality(PER_LINUX); #endif + setup_new_exec(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index edd90c49003c..fd5b2ea5d299 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* - * The early SET_PERSONALITY here is so that the lookup - * for the interpreter happens in the namespace of the - * to-be-execed image. SET_PERSONALITY can select an - * alternate root. - * - * However, SET_PERSONALITY is NOT allowed to switch - * this task into the new images's memory mapping - * policy - that is, TASK_SIZE must still evaluate to - * that which is appropriate to the execing application. - * This is because exit_mmap() needs to have TASK_SIZE - * evaluate to the size of the old image. - * - * So if (say) a 64-bit application is execing a 32-bit - * application it is the architecture's responsibility - * to defer changing the value of TASK_SIZE until the - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ - SET_PERSONALITY(loc->elf_ex); - interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) @@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; - arch_pick_mmap_layout(current->mm); + + setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c57d9ce5ff7e..18d77297ccc8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; + + setup_new_exec(bprm); + set_binfmt(&elf_fdpic_format); current->mm->start_code = 0; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index d4a00ea1054c..42c6b4a54445 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); + setup_new_exec(bprm); } /* diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 2a9b5330cc5e..cc8560f6c9b0 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; + setup_new_exec(bprm); /* Set the task size for HP-UX processes such that * the gateway page is outside the address space. diff --git a/fs/exec.c b/fs/exec.c index 632b02e34ec7..675c3f44c2ea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; bprm->mm = NULL; /* We're using it now */ + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); - - return 0; - -out: - return retval; } - -EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(setup_new_exec); /* * Prepare credentials and lock ->cred_guard_mutex. diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index cd4349bdc34e..89c6249fc561 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *); extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); +extern void setup_new_exec(struct linux_binprm * bprm); extern int suid_dumpable; #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929b..abdfacc58653 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1369,7 +1369,7 @@ struct task_struct { char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - - initialized normally by flush_old_exec */ + - initialized normally by setup_new_exec */ /* file system info */ int link_count, total_link_count; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From 05d43ed8a89c159ff641d472f970e3f1baa66318 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 28 Jan 2010 22:14:43 -0800 Subject: x86: get rid of the insane TIF_ABI_PENDING bit Now that the previous commit made it possible to do the personality setting at the point of no return, we do just that for ELF binaries. And suddenly all the reasons for that insane TIF_ABI_PENDING bit go away, and we can just make SET_PERSONALITY() just do the obvious thing for a 32-bit compat process. Everything becomes much more straightforward this way. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 1 - arch/x86/include/asm/elf.h | 10 ++-------- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/kernel/process.c | 12 ------------ arch/x86/kernel/process_64.c | 11 +++++++++++ 5 files changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 435d2a5323da..f9f472462753 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -311,7 +311,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_ABI_PENDING); setup_new_exec(bprm); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index b4501ee223ad..1994d3f58443 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -181,14 +181,8 @@ do { \ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); #define compat_start_thread start_thread_ia32 -#define COMPAT_SET_PERSONALITY(ex) \ -do { \ - if (test_thread_flag(TIF_IA32)) \ - clear_thread_flag(TIF_ABI_PENDING); \ - else \ - set_thread_flag(TIF_ABI_PENDING); \ - current->personality |= force_personality32; \ -} while (0) +void set_personality_ia32(void); +#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() #define COMPAT_ELF_PLATFORM ("i686") diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 375c917c37d2..e0d28901e969 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -87,7 +87,6 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 #define TIF_MEMDIE 20 #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -112,7 +111,6 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 02c3ee013ccd..c9b3522b6b46 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -115,18 +115,6 @@ void flush_thread(void) { struct task_struct *tsk = current; -#ifdef CONFIG_X86_64 - if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { - clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); - if (test_tsk_thread_flag(tsk, TIF_IA32)) { - clear_tsk_thread_flag(tsk, TIF_IA32); - } else { - set_tsk_thread_flag(tsk, TIF_IA32); - current_thread_info()->status |= TS_COMPAT; - } - } -#endif - flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f9e033150cdf..41a26a82470a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -521,6 +521,17 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } +void set_personality_ia32(void) +{ + /* inherit personality from parent */ + + /* Make sure to be in 32bit mode */ + set_thread_flag(TIF_IA32); + + /* Prepare the first "return" to user space */ + current_thread_info()->status |= TS_COMPAT; +} + unsigned long get_wchan(struct task_struct *p) { unsigned long stack; -- cgit v1.2.3 From 7c099ce1575126395f186ecf58b51a60d5c3be7d Mon Sep 17 00:00:00 2001 From: David Härdeman Date: Thu, 28 Jan 2010 21:02:54 +0100 Subject: x86: Add quirk for Intel DG45FC board to avoid low memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6aa542a694dc9ea4344a8a590d2628c33d1b9431 added a quirk for the Intel DG45ID board due to low memory corruption. The Intel DG45FC shares the same BIOS (and the same bug) as noted in: http://bugzilla.kernel.org/show_bug.cgi?id=13736 Signed-off-by: David Härdeman LKML-Reference: <20100128200254.GA9134@hardeman.nu> Cc: Cc: Alexey Fisher Cc: ykzhao Cc: Tony Bones Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7b8b9894b22..5d9e40c58628 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -642,19 +642,27 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), }, }, - { /* - * AMI BIOS with low memory corruption was found on Intel DG45ID board. - * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will + * AMI BIOS with low memory corruption was found on Intel DG45ID and + * DG45FC boards. + * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will * match only DMI_BOARD_NAME and see if there is more bad products * with this vendor. */ + { .callback = dmi_low_memory_corruption, .ident = "AMI BIOS", .matches = { DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), + }, + }, #endif {} }; -- cgit v1.2.3 From cc0967490c1c3824bc5b75718b6ca8a51d9f2617 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:42 -0600 Subject: x86, hw_breakpoints, kgdb: Fix kgdb to use hw_breakpoint API In the 2.6.33 kernel, the hw_breakpoint API is now used for the performance event counters. The hw_breakpoint_handler() now consumes the hw breakpoints that were previously set by kgdb arch specific code. In order for kgdb to work in conjunction with this core API change, kgdb must use some of the low level functions of the hw_breakpoint API to install, uninstall, and deal with hw breakpoint reservations. The kgdb core required a change to call kgdb_disable_hw_debug anytime a slave cpu enters kgdb_wait() in order to keep all the hw breakpoints in sync as well as to prevent hitting a hw breakpoint while kgdb is active. During the architecture specific initialization of kgdb, it will pre-allocate 4 disabled (struct perf event **) structures. Kgdb will use these to manage the capabilities for the 4 hw breakpoint registers, per cpu. Right now the hw_breakpoint API does not have a way to ask how many breakpoints are available, on each CPU so it is possible that the install of a breakpoint might fail when kgdb restores the system to the run state. The intent of this patch is to first get the basic functionality of hw breakpoints working and leave it to the person debugging the kernel to understand what hw breakpoints are in use and what restrictions have been imposed as a result. Breakpoint constraints will be dealt with in a future patch. While atomic, the x86 specific kgdb code will call arch_uninstall_hw_breakpoint() and arch_install_hw_breakpoint() to manage the cpu specific hw breakpoints. The net result of these changes allow kgdb to use the same pool of hw_breakpoints that are used by the perf event API, but neither knows about future reservations for the available hw breakpoint slots. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-2-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 171 ++++++++++++++++++++++++++++++++----------------- kernel/kgdb.c | 3 + 2 files changed, 117 insertions(+), 57 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index dd74fe7273b1..62bea7307eaa 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -204,40 +205,38 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) static struct hw_breakpoint { unsigned enabled; - unsigned type; - unsigned len; unsigned long addr; + int len; + int type; + struct perf_event **pev; } breakinfo[4]; static void kgdb_correct_hw_break(void) { - unsigned long dr7; - int correctit = 0; - int breakbit; int breakno; - get_debugreg(dr7, 7); for (breakno = 0; breakno < 4; breakno++) { - breakbit = 2 << (breakno << 1); - if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { - correctit = 1; - dr7 |= breakbit; - dr7 &= ~(0xf0000 << (breakno << 2)); - dr7 |= ((breakinfo[breakno].len << 2) | - breakinfo[breakno].type) << - ((breakno << 2) + 16); - set_debugreg(breakinfo[breakno].addr, breakno); - - } else { - if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { - correctit = 1; - dr7 &= ~breakbit; - dr7 &= ~(0xf0000 << (breakno << 2)); - } - } + struct perf_event *bp; + struct arch_hw_breakpoint *info; + int val; + int cpu = raw_smp_processor_id(); + if (!breakinfo[breakno].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); + info = counter_arch_bp(bp); + if (bp->attr.disabled != 1) + continue; + bp->attr.bp_addr = breakinfo[breakno].addr; + bp->attr.bp_len = breakinfo[breakno].len; + bp->attr.bp_type = breakinfo[breakno].type; + info->address = breakinfo[breakno].addr; + info->len = breakinfo[breakno].len; + info->type = breakinfo[breakno].type; + val = arch_install_hw_breakpoint(bp); + if (!val) + bp->attr.disabled = 0; } - if (correctit) - set_debugreg(dr7, 7); + hw_breakpoint_restore(); } static int @@ -259,15 +258,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) static void kgdb_remove_all_hw_break(void) { int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; - for (i = 0; i < 4; i++) - memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } } static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { - unsigned type; int i; for (i = 0; i < 4; i++) @@ -278,27 +285,38 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) switch (bptype) { case BP_HARDWARE_BREAKPOINT: - type = 0; - len = 1; + len = 1; + breakinfo[i].type = X86_BREAKPOINT_EXECUTE; break; case BP_WRITE_WATCHPOINT: - type = 1; + breakinfo[i].type = X86_BREAKPOINT_WRITE; break; case BP_ACCESS_WATCHPOINT: - type = 3; + breakinfo[i].type = X86_BREAKPOINT_RW; break; default: return -1; } - - if (len == 1 || len == 2 || len == 4) - breakinfo[i].len = len - 1; - else + switch (len) { + case 1: + breakinfo[i].len = X86_BREAKPOINT_LEN_1; + break; + case 2: + breakinfo[i].len = X86_BREAKPOINT_LEN_2; + break; + case 4: + breakinfo[i].len = X86_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case 8: + breakinfo[i].len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: return -1; - - breakinfo[i].enabled = 1; + } breakinfo[i].addr = addr; - breakinfo[i].type = type; + breakinfo[i].enabled = 1; return 0; } @@ -313,8 +331,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) */ void kgdb_disable_hw_debug(struct pt_regs *regs) { + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + /* Disable hardware debugging while we are in kgdb: */ set_debugreg(0UL, 7); + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } } /** @@ -378,7 +409,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, struct pt_regs *linux_regs) { unsigned long addr; - unsigned long dr6; char *ptr; int newPC; @@ -404,20 +434,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, raw_smp_processor_id()); } - get_debugreg(dr6, 6); - if (!(dr6 & 0x4000)) { - int breakno; - - for (breakno = 0; breakno < 4; breakno++) { - if (dr6 & (1 << breakno) && - breakinfo[breakno].type == 0) { - /* Set restore flag: */ - linux_regs->flags |= X86_EFLAGS_RF; - break; - } - } - } - set_debugreg(0UL, 6); kgdb_correct_hw_break(); return 0; @@ -485,8 +501,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) break; case DIE_DEBUG: - if (atomic_read(&kgdb_cpu_doing_single_step) == - raw_smp_processor_id()) { + if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) return single_step_cont(regs, args); break; @@ -539,7 +554,42 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - return register_die_notifier(&kgdb_notifier); + int i, cpu; + int ret; + struct perf_event_attr attr; + struct perf_event **pevent; + + ret = register_die_notifier(&kgdb_notifier); + if (ret != 0) + return ret; + /* + * Pre-allocate the hw breakpoint structions in the non-atomic + * portion of kgdb because this operation requires mutexs to + * complete. + */ + attr.bp_addr = (unsigned long)kgdb_arch_init; + attr.type = PERF_TYPE_BREAKPOINT; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + for (i = 0; i < 4; i++) { + breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); + if (IS_ERR(breakinfo[i].pev)) { + printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); + breakinfo[i].pev = NULL; + kgdb_arch_exit(); + return -1; + } + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[i].pev, cpu); + pevent[0]->hw.sample_period = 1; + if (pevent[0]->destroy != NULL) { + pevent[0]->destroy = NULL; + release_bp_slot(*pevent); + } + } + } + return ret; } /** @@ -550,6 +600,13 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { + int i; + for (i = 0; i < 4; i++) { + if (breakinfo[i].pev) { + unregister_wide_hw_breakpoint(breakinfo[i].pev); + breakinfo[i].pev = NULL; + } + } unregister_die_notifier(&kgdb_notifier); } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e23514..c7ade62e4ef0 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs) smp_wmb(); atomic_set(&cpu_in_kgdb[cpu], 1); + /* Disable any cpu specific hw breakpoints */ + kgdb_disable_hw_debug(regs); + /* Wait till primary CPU is done with debugging */ while (atomic_read(&passive_cpu_wait[cpu])) cpu_relax(); -- cgit v1.2.3 From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:43 -0600 Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger This patch fixes the regression in functionality where the kernel debugger and the perf API do not nicely share hw breakpoint reservations. The kernel debugger cannot use any mutex_lock() calls because it can start the kernel running from an invalid context. A mutex free version of the reservation API needed to get created for the kernel debugger to safely update hw breakpoint reservations. The possibility for a breakpoint reservation to be concurrently processed at the time that kgdb interrupts the system is improbable. Should this corner case occur the end user is warned, and the kernel debugger will prohibit updating the hardware breakpoint reservations. Any time the kernel debugger reserves a hardware breakpoint it will be a system wide reservation. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 51 ++++++++++++++++++++++++++++++++++++++++++ include/linux/hw_breakpoint.h | 2 ++ kernel/hw_breakpoint.c | 52 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 62bea7307eaa..bfba6019d762 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void) hw_breakpoint_restore(); } +static int hw_break_reserve_slot(int breakno) +{ + int cpu; + int cnt = 0; + struct perf_event **pevent; + + for_each_online_cpu(cpu) { + cnt++; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_reserve_bp_slot(*pevent)) + goto fail; + } + + return 0; + +fail: + for_each_online_cpu(cpu) { + cnt--; + if (!cnt) + break; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + dbg_release_bp_slot(*pevent); + } + return -1; +} + +static int hw_break_release_slot(int breakno) +{ + struct perf_event **pevent; + int cpu; + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_release_bp_slot(*pevent)) + /* + * The debugger is responisble for handing the retry on + * remove failure. + */ + return -1; + } + return 0; +} + static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { @@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) if (i == 4) return -1; + if (hw_break_release_slot(i)) { + printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); + return -1; + } breakinfo[i].enabled = 0; return 0; @@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) return -1; } breakinfo[i].addr = addr; + if (hw_break_reserve_slot(i)) { + breakinfo[i].addr = 0; + return -1; + } breakinfo[i].enabled = 1; return 0; diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 41235c93e4e9..070ba0621738 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); +extern int dbg_reserve_bp_slot(struct perf_event *bp); +extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c030ae657f20..8a5c7d55ac9f 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM */ -int reserve_bp_slot(struct perf_event *bp) +static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; - int ret = 0; - - mutex_lock(&nr_bp_mutex); fetch_bp_busy_slots(&slots, bp); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) { - ret = -ENOSPC; - goto end; - } + if (slots.pinned + (!!slots.flexible) == HBP_NUM) + return -ENOSPC; toggle_bp_slot(bp, true); -end: + return 0; +} + +int reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + mutex_lock(&nr_bp_mutex); + + ret = __reserve_bp_slot(bp); + mutex_unlock(&nr_bp_mutex); return ret; } +static void __release_bp_slot(struct perf_event *bp) +{ + toggle_bp_slot(bp, false); +} + void release_bp_slot(struct perf_event *bp) { mutex_lock(&nr_bp_mutex); - toggle_bp_slot(bp, false); + __release_bp_slot(bp); mutex_unlock(&nr_bp_mutex); } +/* + * Allow the kernel debugger to reserve breakpoint slots without + * taking a lock using the dbg_* variant of for the reserve and + * release breakpoint slots. + */ +int dbg_reserve_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + return __reserve_bp_slot(bp); +} + +int dbg_release_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + __release_bp_slot(bp); + + return 0; +} int register_perf_hw_breakpoint(struct perf_event *bp) { -- cgit v1.2.3 From ea0854170c95245a258b386c7a9314399c949fe0 Mon Sep 17 00:00:00 2001 From: Shaohui Zheng Date: Tue, 2 Feb 2010 13:44:16 -0800 Subject: memory hotplug: fix a bug on /dev/mem for 64-bit kernels Newly added memory can not be accessed via /dev/mem, because we do not update the variables high_memory, max_pfn and max_low_pfn. Add a function update_end_of_memory_vars() to update these variables for 64-bit kernels. [akpm@linux-foundation.org: simplify comment] Signed-off-by: Shaohui Zheng Cc: Andi Kleen Cc: Li Haicheng Reviewed-by: Wu Fengguang Reviewed-by: KAMEZAWA Hiroyuki Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5198b9bb34ef..69ddfbd91135 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -49,6 +49,7 @@ #include #include #include +#include static unsigned long dma_reserve __initdata; @@ -615,6 +616,21 @@ void __init paging_init(void) * Memory hotplug specific functions */ #ifdef CONFIG_MEMORY_HOTPLUG +/* + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need + * updating. + */ +static void update_end_of_memory_vars(u64 start, u64 size) +{ + unsigned long end_pfn = PFN_UP(start + size); + + if (end_pfn > max_pfn) { + max_pfn = end_pfn; + max_low_pfn = end_pfn; + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + } +} + /* * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. @@ -634,6 +650,9 @@ int arch_add_memory(int nid, u64 start, u64 size) ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start, size); + return ret; } EXPORT_SYMBOL_GPL(arch_add_memory); -- cgit v1.2.3 From 923de3cf5bf12049628019010e36623fca5ef6d1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Jan 2010 19:13:49 +0800 Subject: kvmclock: count total_sleep_time when updating guest clock Current kvm wallclock does not consider the total_sleep_time which could cause wrong wallclock in guest after host suspend/resume. This patch solve this issue by counting total_sleep_time to get the correct host boot time. Cc: stable@kernel.org Signed-off-by: Jason Wang Acked-by: Glauber Costa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1ddcad452add..a1e1bc9d412d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) { static int version; struct pvclock_wall_clock wc; - struct timespec now, sys, boot; + struct timespec boot; if (!wall_clock) return; @@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) * wall clock specified here. guest system time equals host * system time for us, thus we must fill in host boot time here. */ - now = current_kernel_time(); - ktime_get_ts(&sys); - boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); + getboottime(&boot); wc.sec = boot.tv_sec; wc.nsec = boot.tv_nsec; @@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); ktime_get_ts(&ts); + monotonic_to_bootbased(&ts); local_irq_restore(flags); /* With all the info we got, fill in the values */ -- cgit v1.2.3 From ee73f656a604d5aa9df86a97102e4e462dd79924 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 29 Jan 2010 17:28:41 -0200 Subject: KVM: PIT: control word is write-only PIT control word (address 0x43) is write-only, reads are undefined. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8254.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 296aba49472a..15578f180e59 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this, return -EOPNOTSUPP; addr &= KVM_PIT_CHANNEL_MASK; + if (addr == 3) + return 0; + s = &pit_state->channels[addr]; mutex_lock(&pit_state->lock); -- cgit v1.2.3