From f03c4866d31e913a8dbc84f7d1459abdaf0bd326 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 30 Mar 2012 19:29:57 +0900 Subject: sh: fix up fallout from system.h disintegration. Quite a bit of fallout all over the place, nothing terribly exciting. Signed-off-by: Paul Mundt --- arch/sh/boards/board-sh7785lcr.c | 1 + arch/sh/boards/mach-hp6xx/pm.c | 1 + arch/sh/kernel/cpu/fpu.c | 1 + arch/sh/kernel/cpu/sh2a/fpu.c | 1 + arch/sh/kernel/cpu/sh4/fpu.c | 1 + arch/sh/kernel/cpu/shmobile/pm.c | 1 + arch/sh/kernel/idle.c | 2 +- arch/sh/kernel/kgdb.c | 1 + arch/sh/kernel/process_32.c | 1 + arch/sh/kernel/smp.c | 1 + arch/sh/mm/cache-sh4.c | 1 + arch/sh/mm/flush-sh4.c | 1 + arch/sh/mm/sram.c | 1 + 13 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c index d879848f3cdd..d0d6221d7c2e 100644 --- a/arch/sh/boards/board-sh7785lcr.c +++ b/arch/sh/boards/board-sh7785lcr.c @@ -28,6 +28,7 @@ #include #include #include +#include /* * NOTE: This board has 2 physical memory maps. diff --git a/arch/sh/boards/mach-hp6xx/pm.c b/arch/sh/boards/mach-hp6xx/pm.c index adc9b4bba828..8b50cf763c06 100644 --- a/arch/sh/boards/mach-hp6xx/pm.c +++ b/arch/sh/boards/mach-hp6xx/pm.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index 7f1b70cace35..f8f7af51c128 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -2,6 +2,7 @@ #include #include #include +#include int init_fpu(struct task_struct *tsk) { diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 488d24e0cdf0..98bbaa447c93 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -14,6 +14,7 @@ #include #include #include +#include /* The PR (precision) bit in the FP Status Register must be clear when * an frchg instruction is executed, otherwise the instruction is undefined. diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e74cd6c0f10d..69ab4d3c8d41 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -16,6 +16,7 @@ #include #include #include +#include /* The PR (precision) bit in the FP Status Register must be clear when * an frchg instruction is executed, otherwise the instruction is undefined. diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index a6f95ae4aae7..08d27fac8d08 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * Notifier lists for pre/post sleep notification diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 64852ecc6881..ee226e20c20c 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index efb6d398dec3..b117781bfea2 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Macros for single step instruction identification */ #define OPCODE_BT(op) (((op) & 0xff00) == 0x8900) diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index f72e3a951588..94273aaf78c1 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -26,6 +26,7 @@ #include #include #include +#include void show_regs(struct pt_regs * regs) { diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index a17a14d32340..eaebdf6a5c77 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -27,6 +27,7 @@ #include #include #include +#include int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 112fea12522a..0e529285b28d 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c index 75a17f5bfa14..0b85dd9dd3a7 100644 --- a/arch/sh/mm/flush-sh4.c +++ b/arch/sh/mm/flush-sh4.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/arch/sh/mm/sram.c b/arch/sh/mm/sram.c index bc156ec4545e..2d8fa718d55e 100644 --- a/arch/sh/mm/sram.c +++ b/arch/sh/mm/sram.c @@ -9,6 +9,7 @@ */ #include #include +#include #include /* -- cgit v1.2.3 From da47f4a3186b4f55914cceffb51adfa55ef4897e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 30 Mar 2012 19:31:22 +0900 Subject: sh: dwarf unwinder depends on SHcompact. Presently there's no SHmedia support plugged in for the dwarf unwinder. While it's trivial to provide an SHmedia version of dwarf_read_arch_reg(), the general sh64 case is more complicated in that the TLB miss handler uses a locked down set of registers for optimization (including the frame pointer) which we need for the unwind table generation. While freeing up the frame pointer for use in the TLB miss handler is reasonably straightforward, it's still more trouble than it's worth, so we simply restrict the unwinder to 32-bit for now. Signed-off-by: Paul Mundt --- arch/sh/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index c1d5a820b1aa..5f2bb4242c0f 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -61,6 +61,7 @@ config DUMP_CODE config DWARF_UNWINDER bool "Enable the DWARF unwinder for stacktraces" select FRAME_POINTER + depends on SUPERH32 default n help Enabling this option will make stacktraces more accurate, at -- cgit v1.2.3 From 87c34ed3aba9249cb06d1a1f100cd3618f29268c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 30 Mar 2012 19:36:03 +0900 Subject: sh: dma: Fix up device attribute mismatch from sysdev fallout. This fixes up an attribute mismatch that was introduced in the sysdev->struct device migration. Signed-off-by: Paul Mundt --- arch/sh/drivers/dma/dma-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c index b1cb2715ad6e..67ee95603813 100644 --- a/arch/sh/drivers/dma/dma-sysfs.c +++ b/arch/sh/drivers/dma/dma-sysfs.c @@ -54,7 +54,7 @@ static int __init dma_subsys_init(void) if (unlikely(ret)) return ret; - return device_create_file(dma_subsys.dev_root, &dev_attr_devices.attr); + return device_create_file(dma_subsys.dev_root, &dev_attr_devices); } postcore_initcall(dma_subsys_init); -- cgit v1.2.3 From cd34e202faa4be7f77fd2f7a59824f13dc14901b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 30 Mar 2012 19:42:26 +0900 Subject: sh: vsyscall: Fix up .eh_frame generation. Some improper formatting caused the .eh_frame generation to fail, resulting in gcc/g++ testsuite failures with regards to unwinding through the vDSO. Now that someone is actually working on this on the gcc side it's time to fix up the kernel side, too. Signed-off-by: Paul Mundt --- arch/sh/kernel/vsyscall/vsyscall-sigreturn.S | 35 ++++++++++++++++++++++++++++ arch/sh/kernel/vsyscall/vsyscall-trapa.S | 23 ++++++++---------- 2 files changed, 45 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S index 555a64f124ca..23af17584054 100644 --- a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S +++ b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S @@ -34,6 +34,41 @@ __kernel_rt_sigreturn: 1: .short __NR_rt_sigreturn .LEND_rt_sigreturn: .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + .previous .section .eh_frame,"a",@progbits +.LCIE1: + .ualong .LCIE1_end - .LCIE1_start +.LCIE1_start: + .ualong 0 /* CIE ID */ + .byte 0x1 /* Version number */ + .string "zRS" /* NUL-terminated augmentation string */ + .uleb128 0x1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 0x11 /* Return address register column */ + .uleb128 0x1 /* Augmentation length and data */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0xc, 0xf, 0x0 /* DW_CFA_def_cfa: r15 ofs 0 */ + + .align 2 +.LCIE1_end: + + .ualong .LFDE0_end-.LFDE0_start /* Length FDE0 */ +.LFDE0_start: + .ualong .LFDE0_start-.LCIE1 /* CIE pointer */ + .ualong .LSTART_sigreturn-. /* PC-relative start address */ + .ualong .LEND_sigreturn-.LSTART_sigreturn + .uleb128 0 /* Augmentation */ + .align 2 +.LFDE0_end: + + .ualong .LFDE1_end-.LFDE1_start /* Length FDE1 */ +.LFDE1_start: + .ualong .LFDE1_start-.LCIE1 /* CIE pointer */ + .ualong .LSTART_rt_sigreturn-. /* PC-relative start address */ + .ualong .LEND_rt_sigreturn-.LSTART_rt_sigreturn + .uleb128 0 /* Augmentation */ + .align 2 +.LFDE1_end: + .previous diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S index 3e70f851cdc6..0eb74d00690a 100644 --- a/arch/sh/kernel/vsyscall/vsyscall-trapa.S +++ b/arch/sh/kernel/vsyscall/vsyscall-trapa.S @@ -3,37 +3,34 @@ .type __kernel_vsyscall,@function __kernel_vsyscall: .LSTART_vsyscall: - /* XXX: We'll have to do something here once we opt to use the vDSO - * page for something other than the signal trampoline.. as well as - * fill out .eh_frame -- PFM. */ + trapa #0x10 + nop .LEND_vsyscall: .size __kernel_vsyscall,.-.LSTART_vsyscall + .previous .section .eh_frame,"a",@progbits - .previous .LCIE: .ualong .LCIE_end - .LCIE_start .LCIE_start: .ualong 0 /* CIE ID */ .byte 0x1 /* Version number */ - .string "zRS" /* NUL-terminated augmentation string */ + .string "zR" /* NUL-terminated augmentation string */ .uleb128 0x1 /* Code alignment factor */ .sleb128 -4 /* Data alignment factor */ .byte 0x11 /* Return address register column */ - /* Augmentation length and data (none) */ - .byte 0xc /* DW_CFA_def_cfa */ - .uleb128 0xf /* r15 */ - .uleb128 0x0 /* offset 0 */ - + .uleb128 0x1 /* Augmentation length and data */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0xc,0xf,0x0 /* DW_CFA_def_cfa: r15 ofs 0 */ .align 2 .LCIE_end: .ualong .LFDE_end-.LFDE_start /* Length FDE */ .LFDE_start: - .ualong .LCIE /* CIE pointer */ - .ualong .LSTART_vsyscall-. /* start address */ + .ualong .LFDE_start-.LCIE /* CIE pointer */ + .ualong .LSTART_vsyscall-. /* PC-relative start address */ .ualong .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 + .uleb128 0 /* Augmentation */ .align 2 .LFDE_end: .previous -- cgit v1.2.3 From 34f2c0ac111aaf090c7d2432dab532640870733a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 1 Apr 2012 16:38:46 -0400 Subject: tile: fix multiple build failures from system.h dismantle Commit bd119c69239322caafdb64517a806037d0d0c70a "Disintegrate asm/system.h for Tile" created the asm/switch_to.h file, but did not add an include of it to all its users. Also, commit b4816afa3986704d1404fc48e931da5135820472 "Move the asm-generic/system.h xchg() implementation to asm-generic/cmpxchg.h" introduced the concept of asm/cmpxchg.h but the tile arch never got one. Fork the cmpxchg content out of the asm/atomic.h file to create one. Acked-by: David Howells Signed-off-by: Paul Gortmaker Signed-off-by: Chris Metcalf --- arch/tile/include/asm/atomic.h | 50 ++-------------------------- arch/tile/include/asm/cmpxchg.h | 73 +++++++++++++++++++++++++++++++++++++++++ arch/tile/kernel/process.c | 1 + arch/tile/kernel/stack.c | 1 + 4 files changed, 77 insertions(+), 48 deletions(-) create mode 100644 arch/tile/include/asm/cmpxchg.h (limited to 'arch') diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index bb696da5d7cd..f2461429a4a4 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -17,6 +17,8 @@ #ifndef _ASM_TILE_ATOMIC_H #define _ASM_TILE_ATOMIC_H +#include + #ifndef __ASSEMBLY__ #include @@ -121,54 +123,6 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); - -#define xchg(ptr, x) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((x)-(x)))(x)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((x)-(x)))(x)); \ - break; \ - default: \ - __xchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define cmpxchg(ptr, o, n) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((o)-(o)))(o), \ - (u32)(typeof((n)-(n)))(n)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((o)-(o)))(o), \ - (u64)(typeof((n)-(n)))(n)); \ - break; \ - default: \ - __cmpxchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define tas(ptr) (xchg((ptr), 1)) - #endif /* __ASSEMBLY__ */ #ifndef __tilegx__ diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 000000000000..276f067e3640 --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,73 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +/* Nonexistent functions intended to cause link errors. */ +extern unsigned long __xchg_called_with_bad_pointer(void); +extern unsigned long __cmpxchg_called_with_bad_pointer(void); + +#define xchg(ptr, x) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((x)-(x)))(x)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((x)-(x)))(x)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((o)-(o)))(o), \ + (u32)(typeof((n)-(n)))(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((o)-(o)))(o), \ + (u64)(typeof((n)-(n)))(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define tas(ptr) (xchg((ptr), 1)) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 30caecac94dc..ee01b1c683e4 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 37ee4d037e0b..0be6b0109ce0 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 8d6951439ef524683057251f1231df232046b6b6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 13:47:57 -0400 Subject: arch/tile/Kconfig: remove pointless "!M386" test. Looks like a cut and paste bug from the x86 version. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 11270ca22c0a..30413c1d1069 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -12,7 +12,7 @@ config TILE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select SYS_HYPERVISOR - select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 + select ARCH_HAVE_NMI_SAFE_CMPXCHG # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT -- cgit v1.2.3 From 3d1e8a81cc81e62d13731e48c40760e60f31b0d5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 13:53:30 -0400 Subject: arch/tile/Kconfig: rename tile_defconfig to tilepro_defconfig We switched to using "tilepro" for the 32-bit stuff a while ago, but missed this one usage. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 30413c1d1069..30393aa500ec 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -69,6 +69,9 @@ config ARCH_PHYS_ADDR_T_64BIT config ARCH_DMA_ADDR_T_64BIT def_bool y +config NEED_DMA_MAP_STATE + def_bool y + config LOCKDEP_SUPPORT def_bool y @@ -118,7 +121,7 @@ config 64BIT config ARCH_DEFCONFIG string - default "arch/tile/configs/tile_defconfig" if !TILEGX + default "arch/tile/configs/tilepro_defconfig" if !TILEGX default "arch/tile/configs/tilegx_defconfig" if TILEGX source "init/Kconfig" -- cgit v1.2.3 From 884197f7ea93dc2faf399060dc29cb0dbbda6937 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 13:56:04 -0400 Subject: arch/tile/Kconfig: don't specify CONFIG_PAGE_OFFSET for 64-bit builds It's fixed at half the VA space and there's no point in configuring it. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 30393aa500ec..96033e2d6845 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -243,6 +243,7 @@ endchoice config PAGE_OFFSET hex + depends on !64BIT default 0xF0000000 if VMSPLIT_3_75G default 0xE0000000 if VMSPLIT_3_5G default 0xB0000000 if VMSPLIT_2_75G -- cgit v1.2.3 From 327e8b6b25588ab906856a4e506b28c59422f11b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 14:04:57 -0400 Subject: arch/tile: fix typo in We aren't yet using this definition in the kernel, but fix it up before someone goes looking for it. Signed-off-by: Chris Metcalf --- arch/tile/include/arch/spr_def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index f548efeb2de3..d6ba449b5363 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -60,8 +60,8 @@ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) #define SPR_IPI_EVENT_RESET_K \ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) -#define SPR_IPI_MASK_SET_K \ - _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) #define INT_IPI_K \ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) -- cgit v1.2.3 From 07feea877d18453bbe4ad47fe2a365eebf56a7af Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 14:10:03 -0400 Subject: arch/tile: revert comment for atomic64_add_unless(). It still returns whether @v was not @u, not the old value, unlike __atomic_add_unless(). Signed-off-by: Chris Metcalf Acked-by: Arun Sharma --- arch/tile/include/asm/atomic_32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 466dc4a39a4f..54d1da826f93 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -200,7 +200,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. - * Returns the old value of @v. + * Returns non-zero if @v was not @u, and zero otherwise. */ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) { -- cgit v1.2.3 From 664c100bce0070148131f8d49518015476c03cae Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 14:17:05 -0400 Subject: arch/tile: fix gcc 4.6 warnings in Fix some signedness and variable usage warnings in change_bit() and test_and_change_bit(). Signed-off-by: Chris Metcalf --- arch/tile/include/asm/bitops_64.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h index 58d021a9834f..60b87ee54fb8 100644 --- a/arch/tile/include/asm/bitops_64.h +++ b/arch/tile/include/asm/bitops_64.h @@ -38,10 +38,10 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - unsigned long old, mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; - old = *addr; + oldval = *addr; do { guess = oldval; oldval = atomic64_cmpxchg((atomic64_t *)addr, @@ -85,7 +85,7 @@ static inline int test_and_change_bit(unsigned nr, volatile unsigned long *addr) { unsigned long mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval = *addr; + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; oldval = *addr; do { -- cgit v1.2.3 From cbe224705e573cead57c4d4bed0c91efb564a344 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 27 Mar 2012 15:21:00 -0400 Subject: arch/tile: use 0 for IRQ_RESCHEDULE instead of 1 This avoids assigning IRQ 0 to PCI devices, because we've seen that doesn't always work well. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index f80f8ceabc67..33cff9a3058b 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -21,7 +21,7 @@ #define NR_IRQS 32 /* IRQ numbers used for linux IPIs. */ -#define IRQ_RESCHEDULE 1 +#define IRQ_RESCHEDULE 0 #define irq_canonicalize(irq) (irq) -- cgit v1.2.3 From b287f69676a34a9fc341de4d79a9c74e1959dec6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:02:52 -0400 Subject: arch/tile: avoid false corrupt frame warning in early boot With lockstat we can end up trying to get a backtrace before "high_memory" is initialized, so don't worry about range testing if it is zero. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ee01b1c683e4..2d5ef617bb39 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -286,7 +286,7 @@ struct task_struct *validate_current(void) static struct task_struct corrupt = { .comm = "" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || - (void *)tsk > high_memory || + (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); tsk = &corrupt; -- cgit v1.2.3 From efb734d8ed040b053f53fd53589ed5d9c9b5cd04 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:05:04 -0400 Subject: arch/tile: make sure to build memcpy_user_64 without frame pointer Add a comment explaining why this is important, and add a CFLAGS_REMOVE clause to the Makefile to make sure it happens. Signed-off-by: Chris Metcalf --- arch/tile/lib/Makefile | 1 + arch/tile/lib/memcpy_user_64.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086ecbef..985f59858234 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ strchr_$(BITS).o strlen_$(BITS).o ifeq ($(CONFIG_TILEGX),y) +CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer lib-y += memcpy_user_64.o else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 4763b3aff1cc..37440caa7370 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -14,7 +14,13 @@ * Do memcpy(), but trap and return "n" when a load or store faults. * * Note: this idiom only works when memcpy() compiles to a leaf function. - * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. * * Also note that we are capturing "n" from the containing scope here. */ -- cgit v1.2.3 From 5f639fdcd8c186c8128c616e94a7e7b159c968ae Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:06:14 -0400 Subject: arch/tile: various bugs in stack backtracer Fix a long-standing bug in the stack backtracer where we would print garbage to the console instead of kernel function names, if the kernel wasn't built with symbol support (e.g. mboot). Make sure to tag every line of userspace backtrace output if we actually have the mmap_sem, since that way if there's no tag, we know that it's because we couldn't trylock the semaphore. Stop doing a TLB flush and examining page tables during backtrace. Instead, just trust that __copy_from_user_inatomic() will properly fault and return a failure, which it should do in all cases. Fix a latent bug where the backtracer would directly examine a signal context in user space, rather than copying it safely to kernel memory first. This meant that a race with another thread could potentially have caused a kernel panic. Guard against unaligned sp when trying to restart backtrace at an interrupt or signal handler point in the kernel backtracer. Report kernel symbolic information for the call instruction rather than for the following instruction. We still report the actual numeric address corresponding to the instruction after the call, for the sake of consistency with the normal expectations for stack backtracers. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/stack.h | 1 - arch/tile/kernel/stack.c | 231 ++++++++++++++++++++---------------------- 2 files changed, 112 insertions(+), 120 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 4d97a2db932e..0e9d382a2d45 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -25,7 +25,6 @@ struct KBacktraceIterator { BacktraceIterator it; struct task_struct *task; /* task we are backtracing */ - pte_t *pgtable; /* page table for user space access */ int end; /* iteration complete. */ int new_context; /* new context is starting */ int profile; /* profiling, so stop on async intrpt */ diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 0be6b0109ce0..b2f44c28dda6 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -21,9 +21,10 @@ #include #include #include +#include +#include #include #include -#include #include #include #include @@ -45,72 +46,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address valid for reading? */ -static int valid_address(struct KBacktraceIterator *kbt, unsigned long address) -{ - HV_PTE *l1_pgtable = kbt->pgtable; - HV_PTE *l2_pgtable; - unsigned long pfn; - HV_PTE pte; - struct page *page; - - if (l1_pgtable == NULL) - return 0; /* can't read user space in other tasks */ - -#ifdef CONFIG_64BIT - /* Find the real l1_pgtable by looking in the l0_pgtable. */ - pte = l1_pgtable[HV_L0_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("L0 huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - page = pfn_to_page(pfn); - BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ - l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); -#endif - pte = l1_pgtable[HV_L1_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - - page = pfn_to_page(pfn); - if (PageHighMem(page)) { - pr_err("L2 page table not in LOWMEM (%#llx)\n", - HV_PFN_TO_CPA(pfn)); - return 0; - } - l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); - pte = l2_pgtable[HV_L2_INDEX(address)]; - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); -} - /* Callback for backtracer; basically a glorified memcpy */ static bool read_memory_func(void *result, unsigned long address, unsigned int size, void *vkbt) { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ if (!in_kernel_stack(kbt, address)) return 0; - } else if (!valid_address(kbt, address)) { - return 0; /* invalid user-space address */ + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ } pagefault_disable(); retval = __copy_from_user_inatomic(result, @@ -128,6 +80,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) unsigned long sp = kbt->it.sp; struct pt_regs *p; + if (sp % sizeof(long) != 0) + return NULL; if (!in_kernel_stack(kbt, sp)) return NULL; if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) @@ -170,27 +124,27 @@ static int is_sigreturn(unsigned long pc) } /* Return a pt_regs pointer for a valid signal handler frame */ -static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE) { - struct rt_sigframe *frame; - unsigned long sigframe_top = - b->sp + sizeof(struct rt_sigframe) - 1; - if (!valid_address(kbt, b->sp) || - !valid_address(kbt, sigframe_top)) { - if (kbt->verbose) - pr_err(" (odd signal: sp %#lx?)\n", - (unsigned long)(b->sp)); + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) return NULL; - } - frame = (struct rt_sigframe *)b->sp; if (kbt->verbose) { pr_err(" \n", - frame->info.si_signo); + kframe->info.si_signo); } - return (struct pt_regs *)&frame->uc.uc_mcontext; + return (struct pt_regs *)&kframe->uc.uc_mcontext; } return NULL; } @@ -203,10 +157,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; + struct rt_sigframe kframe; p = valid_fault_handler(kbt); if (p == NULL) - p = valid_sigframe(kbt); + p = valid_sigframe(kbt, &kframe); if (p == NULL) return 0; backtrace_init(&kbt->it, read_memory_func, kbt, @@ -266,41 +221,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, /* * Set up callback information. We grab the kernel stack base - * so we will allow reads of that address range, and if we're - * asking about the current process we grab the page table - * so we can check user accesses before trying to read them. - * We flush the TLB to avoid any weird skew issues. + * so we will allow reads of that address range. */ - is_current = (t == NULL); + is_current = (t == NULL || t == current); kbt->is_current = is_current; if (is_current) t = validate_current(); kbt->task = t; - kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ kbt->end = KBT_ONGOING; - kbt->new_context = 0; - if (is_current) { - HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; - if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) { - /* - * Not just an optimization: this also allows - * this to work at all before va/pa mappings - * are set up. - */ - kbt->pgtable = swapper_pg_dir; - } else { - struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa)); - if (!PageHighMem(page)) - kbt->pgtable = __va(pgdir_pa); - else - pr_err("page table not in LOWMEM" - " (%#llx)\n", pgdir_pa); - } - local_flush_tlb_all(); + kbt->new_context = 1; + if (is_current) validate_stack(regs); - } if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { @@ -346,6 +279,78 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt) } EXPORT_SYMBOL(KBacktraceIterator_next); +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -354,6 +359,7 @@ EXPORT_SYMBOL(KBacktraceIterator_next); void tile_show_stack(struct KBacktraceIterator *kbt, int headers) { int i; + int have_mmap_sem = 0; if (headers) { /* @@ -370,31 +376,16 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { - char *modname; - const char *name; - unsigned long address = kbt->it.pc; - unsigned long offset, size; char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; - if (address >= PAGE_OFFSET) - name = kallsyms_lookup(address, &size, &offset, - &modname, namebuf); - else - name = NULL; - - if (!name) - namebuf[0] = '\0'; - else { - size_t namelen = strlen(namebuf); - size_t remaining = (sizeof(namebuf) - 1) - namelen; - char *p = namebuf + namelen; - int rc = snprintf(p, remaining, "+%#lx/%#lx ", - offset, size); - if (modname && rc < remaining) - snprintf(p + rc, remaining - rc, - "[%s] ", modname); - namebuf[sizeof(namebuf)-1] = '\0'; - } + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", i++, address, namebuf, (unsigned long)(kbt->it.sp)); @@ -409,6 +400,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); } EXPORT_SYMBOL(tile_show_stack); -- cgit v1.2.3 From e17235382dbb05f70146e141e4b780fd069050dc Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 14:52:00 -0400 Subject: arch/tile: work around a hardware issue with the return-address stack In certain circumstances we need to do a bunch of jump-and-link instructions to fill the hardware return-address stack with nonzero values. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/traps.h | 6 +++++- arch/tile/kernel/intvec_64.S | 12 ++++++++++++ arch/tile/kernel/traps.c | 6 +++++- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f20f920f932..e28c3df4176a 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -64,7 +64,11 @@ void do_breakpoint(struct pt_regs *, int fault_num); #ifdef __tilegx__ +/* kernel/single_step.c */ void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); #endif -#endif /* _ASM_TILE_SYSCALLS_H */ +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 79c93e10ba27..2c181c864ef7 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -1156,6 +1156,18 @@ int_unalign: push_extra_callee_saves r0 j do_trap +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + /* Include .intrpt1 array of interrupt vectors */ .section ".intrpt1", "ax" diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 2bb6602a1ee7..32acfd9e23d0 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -289,7 +289,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; #ifdef __tilegx__ - case INT_ILL_TRANS: + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + signo = SIGSEGV; code = SEGV_MAPERR; if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) @@ -297,6 +300,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, else address = 0; /* FIXME: GX: single-step for address */ break; + } #endif default: panic("Unexpected do_trap interrupt number %d", fault_num); -- cgit v1.2.3 From a714ffff36a581756ec3b001f47e8e5e96a9fa0e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:23:54 -0400 Subject: arch/tile: fix up some minor trap handling issues We now respond to MEM_ERROR traps (e.g. an atomic instruction to non-cacheable memory) with a SIGBUS. We also no longer generate a console crash message if a user process die due to a SIGTRAP. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_64.S | 2 +- arch/tile/kernel/traps.c | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 2c181c864ef7..005535d108c1 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -1178,7 +1178,7 @@ STD_ENTRY(fill_ra_stack) #define do_hardwall_trap bad_intr #endif - int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr #if CONFIG_KERNEL_PL == 2 int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 32acfd9e23d0..73cff814ac57 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -200,7 +200,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, { siginfo_t info = { 0 }; int signo, code; - unsigned long address; + unsigned long address = 0; bundle_bits instr; /* Re-enable interrupts. */ @@ -223,6 +223,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, } switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { @@ -312,7 +316,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; - trace_unhandled_signal("trap", regs, address, signo); + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } -- cgit v1.2.3 From 51bcdf8879f7920946c90087e6160680812a44bd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:29:28 -0400 Subject: arch/tile: fix a couple of comments that needed updating Not associated with any code changes, so I'm just lumping these comment changes into a commit by themselves. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 11 +++++++++-- arch/tile/mm/fault.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 5f85d8b34dbb..023e2e1cf7f8 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -913,6 +913,13 @@ void __cpuinit setup_cpu(int boot) #ifdef CONFIG_BLK_DEV_INITRD +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ static int __initdata set_initramfs_file; static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; @@ -928,9 +935,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an additional "initramfs.cpio.gz" file in the hvfs. + * We look for an "initramfs.cpio.gz" file in the hvfs. * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs after any built-in initramfs_data. + * unpacked to the initramfs. */ static void __init load_hv_initrd(void) { diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index cba30e9547b4..cac17c4f2ecf 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -130,7 +130,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) } /* - * Handle a fault on the vmalloc or module mapping area + * Handle a fault on the vmalloc area. */ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) { -- cgit v1.2.3 From 6731aa9eae3e94b094a44d2aea02387a39202fbc Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:30:19 -0400 Subject: arch/tile/Makefile: use KCFLAGS when figuring out the libgcc path. Signed-off-by: Chris Metcalf --- arch/tile/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce70569b..5e4d3b98d711 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -30,7 +30,8 @@ ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) endif -LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) +LIBGCC_PATH := \ + $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) # Provide the path to use for "make defconfig". KBUILD_DEFCONFIG := $(ARCH)_defconfig -- cgit v1.2.3 From 48292738d06d7b46d861652ef59bd03be931c2c9 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:34:52 -0400 Subject: arch/tile: don't wait for migrating PTEs in an NMI handler Doing so raises the possibility of self-deadlock if we are waiting for a backtrace for an oprofile or perf interrupt while we are in the middle of migrating our own stack page. Signed-off-by: Chris Metcalf --- arch/tile/mm/fault.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index cac17c4f2ecf..a1da473c8555 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -203,9 +203,14 @@ static pgd_t *get_current_pgd(void) * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. + * + * If we find a migrating PTE while we're in an NMI context, and we're + * at a PC that has a registered exception handler, we don't wait, + * since this thread may (e.g.) have been interrupted while migrating + * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, - unsigned long address, + unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; @@ -227,6 +232,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num, pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { + if (in_nmi() && search_exception_tables(pc)) + return 0; wait_for_migration(pte); return 1; } @@ -300,7 +307,7 @@ static int handle_page_fault(struct pt_regs *regs, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); - if (handle_migrating_pte(pgd, fault_num, address, + if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; @@ -665,7 +672,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, */ if (fault_num == INT_DTLB_ACCESS) write = 1; - if (handle_migrating_pte(pgd, fault_num, address, 1, write)) + if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) return state; /* Return zero so that we continue on with normal fault handling. */ -- cgit v1.2.3 From 12400f1f22ad7651efa1d3d06dd8b6b178d19ea3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:36:53 -0400 Subject: arch/tile: don't set the homecache of a PTE unless appropriate We make sure not to try to set the home for an MMIO PTE (on tilegx) or a PTE that isn't referencing memory managed by Linux. Signed-off-by: Chris Metcalf --- arch/tile/mm/pgtable.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 87303693a072..6b9a109c3e31 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -469,10 +469,18 @@ void __set_pte(pte_t *ptep, pte_t pte) void set_pte(pte_t *ptep, pte_t pte) { - struct page *page = pfn_to_page(pte_pfn(pte)); - - /* Update the home of a PTE if necessary */ - pte = pte_set_home(pte, page_home(page)); + if (pte_present(pte) && + (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { + /* The PTE actually references physical memory. */ + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + /* Update the home of the PTE from the struct page. */ + pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); + } else if (hv_pte_get_mode(pte) == 0) { + /* remap_pfn_range(), etc, must supply PTE mode. */ + panic("set_pte(): out-of-range PFN and mode 0\n"); + } + } __set_pte(ptep, pte); } -- cgit v1.2.3 From b230ff2d5c53bb79e1121554cd3c827e5c1b70fd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:40:50 -0400 Subject: arch/tile: don't enable irqs unconditionally in page fault handler If we took a page fault while we had interrupts disabled, we shouldn't enable them in the page fault handler. Signed-off-by: Chris Metcalf --- arch/tile/mm/fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index a1da473c8555..22e58f51ed23 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -342,9 +342,12 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the - * kernel, so either way we can re-enable interrupts here. + * kernel, so either way we can re-enable interrupts here + * unless we are doing atomic access to user space with + * interrupts disabled. */ - local_irq_enable(); + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); mm = tsk->mm; -- cgit v1.2.3 From 7a7039ee71811222310b431aee246eb78dd0d401 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:42:27 -0400 Subject: arch/tile: fix bug in loading kernels larger than 16 MB Previously we only handled kernels up to a single huge page in size. Now we create additional PTEs appropriately. Signed-off-by: Chris Metcalf --- arch/tile/mm/init.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 830c4908ea76..8400d3fb9e0a 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -557,6 +557,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); + pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; @@ -579,10 +580,15 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } BUG_ON(address != (unsigned long)_stext); - pfn = 0; /* code starts at PA 0 */ - pte = alloc_pte(); - for (pte_ofs = 0; address < (unsigned long)_einittext; - pfn++, pte_ofs++, address += PAGE_SIZE) { + pte = NULL; + for (; address < (unsigned long)_einittext; + pfn++, address += PAGE_SIZE) { + pte_ofs = pte_index(address); + if (pte_ofs == 0) { + if (pte) + assign_pte(pmd++, pte); + pte = alloc_pte(); + } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); @@ -591,7 +597,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } pte[pte_ofs] = pfn_pte(pfn, prot); } - assign_pte(pmd, pte); + if (pte) + assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); @@ -614,7 +621,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); - *(pte_t *)pmd = pteval; + for (; address < (unsigned long)_einittext; + pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) + *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ -- cgit v1.2.3 From 444eef1ba40546690a77b2af4cba7d4561e7bba5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:43:20 -0400 Subject: arch/tile: fix bug in delay_backoff() We were carefully computing a value to use for the number of loops to spin for, and then ignoring it. Signed-off-by: Chris Metcalf --- arch/tile/lib/spinlock_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c10109809132..6ac37509faca 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h @@ -60,5 +60,5 @@ static void delay_backoff(int iterations) loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & (loops - 1); - relax(1 << exponent); + relax(loops); } -- cgit v1.2.3 From 5f220704127ae70db519fabbda4ece649eadac7f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:44:10 -0400 Subject: arch/tile: don't leak kernel memory when we unload modules We were failing to track the memory when we allocated it. Signed-off-by: Chris Metcalf --- arch/tile/kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index b90ab9925674..98d476920106 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -67,6 +67,8 @@ void *module_alloc(unsigned long size) area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); if (!area) goto error; + area->nr_pages = npages; + area->pages = pages; if (map_vm_area(area, prot_rwx, &pages)) { vunmap(area->addr); -- cgit v1.2.3 From 719ea79e330c5e1a17fb7e4cf352a81e4c84cff5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:50:08 -0400 Subject: arch/tile: fix up locking in pgtable.c slightly We should be holding the init_mm.page_table_lock in shatter_huge_page() since we are modifying the kernel page tables. Then, only if we are walking the other root page tables to update them, do we want to take the pgd_lock. Add a comment about taking the pgd_lock that we always do it with interrupts disabled and therefore are not at risk from the tlbflush IPI deadlock as is seen on x86. Signed-off-by: Chris Metcalf --- arch/tile/mm/pgtable.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 6b9a109c3e31..2410aa899b3e 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -177,14 +177,10 @@ void shatter_huge_page(unsigned long addr) if (!pmd_huge_page(*pmd)) return; - /* - * Grab the pgd_lock, since we may need it to walk the pgd_list, - * and since we need some kind of lock here to avoid races. - */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock_irqsave(&init_mm.page_table_lock, flags); if (!pmd_huge_page(*pmd)) { /* Lost the race to convert the huge page. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); return; } @@ -194,6 +190,7 @@ void shatter_huge_page(unsigned long addr) #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ + spin_lock(&pgd_lock); list_for_each(pos, &pgd_list) { pmd_t *copy_pmd; pgd = list_to_pgd(pos) + pgd_index(addr); @@ -201,6 +198,7 @@ void shatter_huge_page(unsigned long addr) copy_pmd = pmd_offset(pud, addr); __set_pmd(copy_pmd, *pmd); } + spin_unlock(&pgd_lock); #endif /* Tell every cpu to notice the change. */ @@ -208,7 +206,7 @@ void shatter_huge_page(unsigned long addr) cpu_possible_mask, NULL, 0); /* Hold the lock until the TLB flush is finished to avoid races. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); } /* @@ -217,9 +215,13 @@ void shatter_huge_page(unsigned long addr) * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. - * The locking scheme was chosen on the basis of manfred's - * recommendations and having no core impact whatsoever. - * -- wli + * + * The lock is always taken with interrupts disabled, unlike on x86 + * and other platforms, because we need to take the lock in + * shatter_huge_page(), which may be called from an interrupt context. + * We are not at risk from the tlbflush IPI deadlock that was seen on + * x86, since we use the flush_remote() API to have the hypervisor do + * the TLB flushes regardless of irq disabling. */ DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); -- cgit v1.2.3 From bfffe79bc29a9c4c817d5f51590961220e26db1a Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:56:18 -0400 Subject: arch/tile: use proper memparse() for "maxmem" options This is more standard and avoids having to remember what units the options actually take. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 023e2e1cf7f8..f3598e7a47fa 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -103,13 +103,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U; static int __init setup_maxmem(char *str) { - long maxmem_mb; - if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 || - maxmem_mb == 0) + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) return -EINVAL; - maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) << - (HPAGE_SHIFT - PAGE_SHIFT); + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; @@ -119,14 +117,15 @@ early_param("maxmem", setup_maxmem); static int __init setup_maxnodemem(char *str) { char *endp; - long maxnodemem_mb, node; + unsigned long long maxnodemem; + long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; - if (node >= MAX_NUMNODES || *endp != ':' || - strict_strtol(endp+1, 0, &maxnodemem_mb) != 0) + if (node >= MAX_NUMNODES || *endp != ':') return -EINVAL; - maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) << + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); -- cgit v1.2.3 From 8c92ba6c327ee5089dec1e92eaa82927bee63d6d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:57:18 -0400 Subject: arch/tile: add "nop" after "nap" to help GX idle power draw This avoids the hardware istream prefetcher doing unnecessary work. Signed-off-by: Chris Metcalf --- arch/tile/kernel/entry.S | 2 ++ arch/tile/kernel/smp.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 431e9ae60488..ec91568df880 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -85,6 +85,7 @@ STD_ENTRY(cpu_idle_on_new_stack) /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap + nop /* avoid provoking the icache prefetch with a jump */ j smp_nap /* we are not architecturally guaranteed not to exit nap */ jrp lr /* clue in the backtracer */ STD_ENDPROC(smp_nap) @@ -105,5 +106,6 @@ STD_ENTRY(_cpu_idle) .global _cpu_idle_nap _cpu_idle_nap: nap + nop /* avoid provoking the icache prefetch with a jump */ jrp lr STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a44e103c5a63..7b6df8c27709 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -103,7 +103,7 @@ static void smp_stop_cpu_interrupt(void) set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); for (;;) - asm("nap"); + asm("nap; nop"); } /* This function calls the 'stop' function on all other CPUs in the system. */ -- cgit v1.2.3 From cb210ee3a81afab7c64777635cc18899a2bdd9a5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 15:59:11 -0400 Subject: arch/tile: implement panic_smp_self_stop() This allows the later-panicking tiles to wait in a lower power state until they get interrupted with an smp_send_stop(). Signed-off-by: Chris Metcalf --- arch/tile/kernel/smp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 7b6df8c27709..91da0f721958 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -113,6 +113,12 @@ void smp_send_stop(void) send_IPI_allbutself(MSG_TAG_STOP_CPU); } +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} /* * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. -- cgit v1.2.3 From 2858f856021340f3730fa8639dd520a2e4331f7f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 16:11:09 -0400 Subject: arch/tile: fix single-stepping over swint1 instructions on tilegx If we are single-stepping and make a syscall, we call ptrace_notify() explicitly on the return path back to user space, since we are returning to a pc value set artificially to the next instruction, and otherwise we won't register that we stepped over the syscall instruction (swint1). Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_64.S | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 005535d108c1..fdff17c70cc8 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1039,11 +1040,25 @@ handle_syscall: /* Do syscall trace again, if requested. */ ld r30, r31 - andi r30, r30, _TIF_SYSCALL_TRACE - beqzt r30, 1f + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: j .Lresume_userspace /* jump into middle of interrupt_return */ .Lcompat_syscall: /* -- cgit v1.2.3 From 918cbd38aef83de3a2516299bcb230caf59462a0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 16:14:40 -0400 Subject: arch/tile: fix pointer cast in cacheflush.c Pragmatically it couldn't be wrong to cast pointers to long to compare them (since all kernel addresses are in the top half of VA space), but it's more correct to cast to unsigned long. Signed-off-by: Chris Metcalf --- arch/tile/lib/cacheflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8928aace7a64..6af2b97a6886 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -109,7 +109,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); - if ((long)base < (long)buffer) + if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* -- cgit v1.2.3 From e81510e0c3800dc730e0c544e3949f7bde368c2b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 29 Mar 2012 16:19:45 -0400 Subject: arch/tile: export the page_home() function. This avois a bug in modules trying to use the function. Signed-off-by: Chris Metcalf --- arch/tile/mm/homecache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 1cc6ae477c98..499f73770b05 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -394,6 +394,7 @@ int page_home(struct page *page) return pte_to_home(*virt_to_pte(NULL, kva)); } } +EXPORT_SYMBOL(page_home); void homecache_change_page_home(struct page *page, int order, int home) { -- cgit v1.2.3 From b14f21906774be181627412fed5b6b5fae2b53a2 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 15:29:40 -0400 Subject: arch/tile: stop mentioning the "kvm" subdirectory It causes "make clean" to fail, for example. Once we have KVM support complete, we'll reinstate the subdir reference. Signed-off-by: Chris Metcalf --- arch/tile/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 5e4d3b98d711..9520bc5a4b7f 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -54,8 +54,6 @@ libs-y += $(LIBGCC_PATH) # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ -core-$(CONFIG_KVM) += arch/tile/kvm/ - ifdef TILERA_ROOT INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot endif -- cgit v1.2.3 From ab306cae660e524edbeb8889e4e23d3c97717b9c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 15:46:29 -0400 Subject: arch/tile: use atomic exchange in arch_write_unlock() This idiom is used elsewhere when we do an unlock by writing a zero, but I missed it here. Using an atomic operation avoids waiting on the write buffer for the unlocking write to be sent to the home cache. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/spinlock_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 72be5904e020..5f8b6a095fd8 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { __insn_mf(); - rw->lock = 0; + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ } static inline int arch_read_trylock(arch_rwlock_t *rw) -- cgit v1.2.3 From 54229ff359250ce7292dbeb59f157a2d3b67e30c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 15:47:38 -0400 Subject: arch/tile: fix finv_buffer_remote() for tilegx There were some correctness issues with this code that are now fixed with this change. The change is likely less performant than it could be, but it should no longer be vulnerable to any races with memory operations on the memory network while invalidating a range of memory. This code is run infrequently so performance isn't critical, but correctness definitely is. Signed-off-by: Chris Metcalf --- arch/tile/lib/cacheflush.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 6af2b97a6886..db4fb89e12d8 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else const unsigned long STRIPE_WIDTH = 8192; +#endif + #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing @@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping + * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. @@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * - * FIXME: See bug 9535 for some issues with this code. + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); +#endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); -- cgit v1.2.3 From cdd8e16feba87a3fc2bb1885d36f895a2a3288bf Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 16:24:41 -0400 Subject: arch/tile: return SIGBUS for addresses that are unaligned AND invalid Previously we were returning SIGSEGV in this case. It seems cleaner to return SIGBUS since the hardware figures out alignment traps before TLB violations, so SIGBUS is the "more correct" signal. Signed-off-by: Chris Metcalf --- arch/tile/kernel/single_step.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index bc1eb586e24d..9efbc1391b3c 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -153,6 +153,25 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (((unsigned long)addr % size) == 0) return bundle; + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + #ifndef __LITTLE_ENDIAN # error We assume little-endian representation with copy_xx_user size 2 here #endif @@ -192,18 +211,6 @@ static tile_bundle_bits rewrite_load_store_unaligned( return (tile_bundle_bits) 0; } - if (unaligned_fixup == 0) { - siginfo_t info = { - .si_signo = SIGBUS, - .si_code = BUS_ADRALN, - .si_addr = addr - }; - trace_unhandled_signal("unaligned trap", regs, - (unsigned long)addr, SIGBUS); - force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; - } - if (unaligned_printk || unaligned_fixup_count == 0) { pr_info("Process %d/%s: PC %#lx: Fixup of" " unaligned %s at %#lx.\n", -- cgit v1.2.3 From b1760c847ff9d04fba7cdbef005a0ad805311c6d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 16:27:20 -0400 Subject: arch/tile: remove bogus performance optimization We were re-homing the initial task's kernel stack on the boot cpu, but in fact it's better to let it stay globally homed, since that task isn't bound to the boot cpu anyway. This is more of a general cleanup than an actual performance optimization, but it removes code, which is a good thing. :-) Signed-off-by: Chris Metcalf --- arch/tile/mm/init.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 8400d3fb9e0a..6a9d20ddc34f 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -254,11 +254,6 @@ static pgprot_t __init init_pgprot(ulong address) return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); } - /* As a performance optimization, keep the boot init stack here. */ - if (address >= (ulong)&init_thread_union && - address < (ulong)&init_thread_union + THREAD_SIZE) - return construct_pgprot(PAGE_KERNEL, smp_processor_id()); - #ifndef __tilegx__ #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ -- cgit v1.2.3 From e1d5c0195075abaa45cd04ca397dbeaa0d18c490 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 30 Mar 2012 16:29:06 -0400 Subject: arch/tile: avoid accidentally unmasking NMI-type interrupt accidentally The return path as we reload registers and core state requires that r30 hold a boolean indicating whether we are returning from an NMI, but in a couple of cases we weren't setting this properly, with the result that we could accidentally unmask the NMI interrupt(s), which could cause confusion. Now we set r30 in every place where we jump into the interrupt return path. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 24 ++++++++++++++++++++---- arch/tile/kernel/intvec_64.S | 19 ++++++++++++++++--- 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index aecc8ed5f39b..5d56a1ef5ba5 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -799,6 +799,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1237,7 +1241,10 @@ handle_syscall: bzt r30, 1f jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1246,7 +1253,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1262,7 +1272,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* @@ -1376,7 +1389,10 @@ handle_ill: jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index fdff17c70cc8..49d9d6621682 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -606,6 +606,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1058,7 +1062,10 @@ handle_syscall: } FEEDBACK_REENTER(handle_syscall) -2: j .Lresume_userspace /* jump into middle of interrupt_return */ +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Lcompat_syscall: /* @@ -1092,7 +1099,10 @@ handle_syscall: movei r28, -ENOSYS } st r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1108,7 +1118,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* Various stub interrupt handlers and syscall handlers */ -- cgit v1.2.3 From b8e6f8ae511d88732247aa2af26bfd1bef21b2f4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 13 Mar 2012 19:59:39 +0100 Subject: KVM: PPC: Book3S: Compile fix for ppc32 in HIOR access code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were failing to compile on book3s_32 with the following errors: arch/powerpc/kvm/book3s_pr.c:883:45: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] arch/powerpc/kvm/book3s_pr.c:898:79: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] Fix this by explicity casting the u64 to long before we use it as a pointer. Also, on PPC32 we can not use get_user/put_user for 64bit wide variables, as there is no single instruction that could load or store variables that big. So instead, we have to use copy_from/to_user which works everywhere. Reported-by: Jörg Sommer Signed-off-by: Alexander Graf Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 642d88574b07..a657c44b7fbc 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -881,7 +881,8 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_PPC_HIOR: - r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + r = copy_to_user((u64 __user *)(long)reg->addr, + &to_book3s(vcpu)->hior, sizeof(u64)); break; default: break; @@ -896,7 +897,8 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_PPC_HIOR: - r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + r = copy_from_user(&to_book3s(vcpu)->hior, + (u64 __user *)(long)reg->addr, sizeof(u64)); if (!r) to_book3s(vcpu)->hior_explicit = true; break; -- cgit v1.2.3 From b4e51229d8a1e499fe65153766437152cca42053 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Feb 2012 00:45:02 +0000 Subject: KVM: PPC: Book3S HV: Fix kvm_alloc_linear in case where no linears exist In kvm_alloc_linear we were using and deferencing ri after the list_for_each_entry had come to the end of the list. In that situation, ri is not really defined and probably points to the list head. This will happen every time if the free_linears list is empty, for instance. This led to a NULL pointer dereference crash in memset on POWER7 while trying to allocate an HPT in the case where no HPTs were preallocated. This fixes it by using a separate variable for the return value from the loop iterator. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_builtin.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index bed1279aa6a8..e1b60f56f2a1 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -173,9 +173,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type) static struct kvmppc_linear_info *kvm_alloc_linear(int type) { - struct kvmppc_linear_info *ri; + struct kvmppc_linear_info *ri, *ret; - ri = NULL; + ret = NULL; spin_lock(&linear_lock); list_for_each_entry(ri, &free_linears, list) { if (ri->type != type) @@ -183,11 +183,12 @@ static struct kvmppc_linear_info *kvm_alloc_linear(int type) list_del(&ri->list); atomic_inc(&ri->use_count); + memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); + ret = ri; break; } spin_unlock(&linear_lock); - memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); - return ri; + return ret; } static void kvm_release_linear(struct kvmppc_linear_info *ri) -- cgit v1.2.3 From a5ddea0e78e76aa8d6354b9b0e51e652e21b8137 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Feb 2012 00:53:21 +0000 Subject: KVM: PPC: Book3S HV: Save and restore CR in __kvmppc_vcore_entry The ABI specifies that CR fields CR2--CR4 are nonvolatile across function calls. Currently __kvmppc_vcore_entry doesn't save and restore the CR, leading to CR2--CR4 getting corrupted with guest values, possibly leading to incorrect behaviour in its caller. This adds instructions to save and restore CR at the points where we save and restore the nonvolatile GPRs. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_interrupts.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 3f7b674dd4bf..d3fb4df02c41 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -46,8 +46,10 @@ _GLOBAL(__kvmppc_vcore_entry) /* Save host state to the stack */ stdu r1, -SWITCH_FRAME_SIZE(r1) - /* Save non-volatile registers (r14 - r31) */ + /* Save non-volatile registers (r14 - r31) and CR */ SAVE_NVGPRS(r1) + mfcr r3 + std r3, _CCR(r1) /* Save host DSCR */ BEGIN_FTR_SECTION @@ -157,8 +159,10 @@ kvmppc_handler_highmem: * R13 = PACA */ - /* Restore non-volatile host registers (r14 - r31) */ + /* Restore non-volatile host registers (r14 - r31) and CR */ REST_NVGPRS(r1) + ld r4, _CCR(r1) + mtcr r4 addi r1, r1, SWITCH_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) -- cgit v1.2.3 From e1f8acf8380abfd52aefbfa524e74af5ce0c8492 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 5 Mar 2012 16:00:28 +0100 Subject: KVM: PPC: Save/Restore CR over vcpu_run On PPC, CR2-CR4 are nonvolatile, thus have to be saved across function calls. We didn't respect that for any architecture until Paul spotted it in his patch for Book3S-HV. This patch saves/restores CR for all KVM capable PPC hosts. Signed-off-by: Alexander Graf Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_interrupts.S | 7 +++++++ arch/powerpc/kvm/booke_interrupts.S | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 0a8515a5c042..3e35383bdb21 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -84,6 +84,10 @@ kvm_start_entry: /* Save non-volatile registers (r14 - r31) */ SAVE_NVGPRS(r1) + /* Save CR */ + mfcr r14 + stw r14, _CCR(r1) + /* Save LR */ PPC_STL r0, _LINK(r1) @@ -165,6 +169,9 @@ kvm_exit_loop: PPC_LL r4, _LINK(r1) mtlr r4 + lwz r14, _CCR(r1) + mtcr r14 + /* Restore non-volatile host registers (r14 - r31) */ REST_NVGPRS(r1) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 10d8ef602e5c..c8c4b878795a 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -34,7 +34,8 @@ /* r2 is special: it holds 'current', and it made nonvolatile in the * kernel with the -ffixed-r2 gcc option. */ #define HOST_R2 12 -#define HOST_NV_GPRS 16 +#define HOST_CR 16 +#define HOST_NV_GPRS 20 #define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4) #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ @@ -296,8 +297,10 @@ heavyweight_exit: /* Return to kvm_vcpu_run(). */ lwz r4, HOST_STACK_LR(r1) + lwz r5, HOST_CR(r1) addi r1, r1, HOST_STACK_SIZE mtlr r4 + mtcr r5 /* r3 still contains the return code from kvmppc_handle_exit(). */ blr @@ -314,6 +317,8 @@ _GLOBAL(__kvmppc_vcpu_run) stw r3, HOST_RUN(r1) mflr r3 stw r3, HOST_STACK_LR(r1) + mfcr r5 + stw r5, HOST_CR(r1) /* Save host non-volatile register state to stack. */ stw r14, HOST_NV_GPR(r14)(r1) -- cgit v1.2.3 From 592f5d87b3feee9d60411f19d583038c0c7670ad Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 13 Mar 2012 23:31:02 +0100 Subject: KVM: PPC: Book3S: PR: Fix preemption We were leaking preemption counters. Fix the code to always toggle between preempt and non-preempt properly. Signed-off-by: Alexander Graf Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a657c44b7fbc..7759053d391b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -777,6 +777,7 @@ program_interrupt: } } + preempt_disable(); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -798,8 +799,6 @@ program_interrupt: run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { - preempt_disable(); - /* In case an interrupt came in that was triggered * from userspace (like DEC), we need to check what * to inject now! */ -- cgit v1.2.3 From a052d2c31b7b87e9b4bdee634af666b5e830e56f Mon Sep 17 00:00:00 2001 From: "Shimoda, Yoshihiro" Date: Wed, 4 Apr 2012 11:56:07 +0900 Subject: sh: fix clock-sh7757 for the latest sh_mobile_sdhi driver The commit 996bc8aebd2cd5b6d4c5d85085f171fa2447f364 (mmc: sh_mobile_sdhi: do not manage PM clocks manually) modified the sh_mobile_sdhi driver to remove the clk_enable/clk_disable. So, we need to change the "CLKDEV_CON_ID" to "CLKDEV_DEV_ID". If we don't change this, we will see the following error from the driver: sh_mobile_sdhi sh_mobile_sdhi.0: timeout waiting for hardware interrupt (CMD52) Signed-off-by: Yoshihiro Shimoda Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh4a/clock-sh7757.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c index 5853989586ed..04ab5aeaf920 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c @@ -113,7 +113,7 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("cpu_clk", &div4_clks[DIV4_I]), /* MSTP32 clocks */ - CLKDEV_CON_ID("sdhi0", &mstp_clks[MSTP004]), + CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP004]), CLKDEV_CON_ID("riic0", &mstp_clks[MSTP000]), CLKDEV_CON_ID("riic1", &mstp_clks[MSTP000]), CLKDEV_CON_ID("riic2", &mstp_clks[MSTP000]), -- cgit v1.2.3 From 7358e51082b68e2026628220510a29197e2b9933 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Mon, 26 Mar 2012 06:40:49 +0000 Subject: sparc/mm/fault_64.c: Port OOM changes to do_sparc64_fault Commit d065bd810b6deb67d4897a14bfe21f8eb526ba99 (mm: retry page fault when blocking on disk transfer) and commit 37b23e0525d393d48a7d59f870b3bc061a30ccdb (x86,mm: make pagefault killable) The above commits introduced changes into the x86 pagefault handler for making the page fault handler retryable as well as killable. These changes reduce the mmap_sem hold time, which is crucial during OOM killer invocation. Port these changes to 64-bit sparc. Signed-off-by: Kautuk Consul Signed-off-by: David S. Miller --- arch/sparc/mm/fault_64.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 504c0622f729..1fe0429b6314 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -279,6 +279,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) unsigned int insn = 0; int si_code, fault_code, fault; unsigned long address, mm_rss; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; fault_code = get_thread_fault_code(); @@ -333,6 +334,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) insn = get_fault_insn(regs, insn); goto handle_kernel_fault; } + +retry: down_read(&mm->mmap_sem); } @@ -423,7 +426,12 @@ good_area: goto bad_area; } - fault = handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); + flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -431,12 +439,27 @@ good_area: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, + 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, + 1, regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } } up_read(&mm->mmap_sem); -- cgit v1.2.3 From c29554f53e4679d30f4eb39cad7700023cbaae67 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Mon, 26 Mar 2012 06:47:54 +0000 Subject: sparc/mm/fault_32.c: Port OOM changes to do_sparc_fault Commit d065bd810b6deb67d4897a14bfe21f8eb526ba99 (mm: retry page fault when blocking on disk transfer) and commit 37b23e0525d393d48a7d59f870b3bc061a30ccdb (x86,mm: make pagefault killable) The above commits introduced changes into the x86 pagefault handler for making the page fault handler retryable as well as killable. These changes reduce the mmap_sem hold time, which is crucial during OOM killer invocation. Port these changes to 32-bit sparc. Signed-off-by: Kautuk Consul Signed-off-by: David S. Miller --- arch/sparc/mm/fault_32.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 7705c6731e28..df3155a17991 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -225,6 +225,8 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long g2; int from_user = !(regs->psr & PSR_PS); int fault, code; + unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + (write ? FAULT_FLAG_WRITE : 0)); if(text_fault) address = regs->pc; @@ -251,6 +253,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); +retry: down_read(&mm->mmap_sem); /* @@ -289,7 +292,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -297,13 +304,29 @@ good_area: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, + 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, + 1, regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } } + up_read(&mm->mmap_sem); return; -- cgit v1.2.3 From d657784b70ef653350d7aa49da90a8484c29da7d Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 4 Apr 2012 21:20:01 +0200 Subject: sparc32,leon: fix leon build Minimal fix to allow leon to be built. Signed-off-by: Sam Ravnborg Cc: Konrad Eisele Cc: Daniel Hellstrom Signed-off-by: David S. Miller --- arch/sparc/kernel/leon_pci.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index aba6b958b2a5..19f56058742b 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -45,7 +45,6 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) void __devinit pcibios_fixup_bus(struct pci_bus *pbus) { - struct leon_pci_info *info = pbus->sysdata; struct pci_dev *dev; int i, has_io, has_mem; u16 cmd; @@ -111,18 +110,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -struct device_node *pci_device_to_OF_node(struct pci_dev *pdev) -{ - /* - * Currently the OpenBoot nodes are not connected with the PCI device, - * this is because the LEON PROM does not create PCI nodes. Eventually - * this will change and the same approach as pcic.c can be used to - * match PROM nodes with pci devices. - */ - return NULL; -} -EXPORT_SYMBOL(pci_device_to_OF_node); - void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) { #ifdef CONFIG_PCI_DEBUG -- cgit v1.2.3 From fea5295324ce7ce6ccfe0909cc6740a2d34aa5a3 Mon Sep 17 00:00:00 2001 From: Sasikantha babu Date: Wed, 21 Mar 2012 18:49:00 +0530 Subject: KVM: PMU: Fix integer constant is too large warning in kvm_pmu_set_msr() Signed-off-by: Sasikantha babu Signed-off-by: Avi Kivity --- arch/x86/kvm/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a73f0c104813..173df38dbda5 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -369,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) case MSR_CORE_PERF_FIXED_CTR_CTRL: if (pmu->fixed_ctr_ctrl == data) return 0; - if (!(data & 0xfffffffffffff444)) { + if (!(data & 0xfffffffffffff444ull)) { reprogram_fixed_counters(pmu, data); return 0; } -- cgit v1.2.3 From 7a4f5ad051e02139a9f1c0f7f4b1acb88915852b Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Mar 2012 19:47:26 -0300 Subject: KVM: VMX: vmx_set_cr0 expects kvm->srcu locked vmx_set_cr0 is called from vcpu run context, therefore it expects kvm->srcu to be held (for setting up the real-mode TSS). Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 280751c84724..ad85adfef843 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3906,7 +3906,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); -- cgit v1.2.3 From e08759215b7dcb7111e94f0f96918dd98e86ca6b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 4 Apr 2012 15:30:33 +0300 Subject: KVM: Resolve RCU vs. async page fault problem "Page ready" async PF can kick vcpu out of idle state much like IRQ. We need to tell RCU about this. Reported-by: Sasha Levin Signed-off-by: Gleb Natapov Reviewed-by: Paul E. McKenney Signed-off-by: Avi Kivity --- arch/x86/kernel/kvm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 694d801bf606..b8ba6e4a27e4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include #include #include +#include static int kvmapf = 1; @@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) kvm_async_pf_task_wait((u32)read_cr2()); break; case KVM_PV_REASON_PAGE_READY: + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wake((u32)read_cr2()); + rcu_irq_exit(); break; } } -- cgit v1.2.3 From 2531d64b6fe2724dc432b67d8dc66bd45621da0b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 20 Mar 2012 15:04:18 -0400 Subject: xen/x86: Workaround 'x86/ioapic: Add register level checks to detect bogus io-apic entries' The above mentioned patch checks the IOAPIC and if it contains -1, then it unmaps said IOAPIC. But under Xen we get this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 IP: [] xen_irq_init+0x1f/0xb0 PGD 0 Oops: 0002 [#1] SMP CPU 0 Modules linked in: Pid: 1, comm: swapper/0 Not tainted 3.2.10-3.fc16.x86_64 #1 Dell Inc. Inspiron 1525 /0U990C RIP: e030:[] [] xen_irq_init+0x1f/0xb0 RSP: e02b: ffff8800d42cbb70 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000ffffffef RCX: 0000000000000001 RDX: 0000000000000040 RSI: 00000000ffffffef RDI: 0000000000000001 RBP: ffff8800d42cbb80 R08: ffff8800d6400000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000ffffffef R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000010 FS: 0000000000000000(0000) GS:ffff8800df5fe000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0:000000008005003b CR2: 0000000000000040 CR3: 0000000001a05000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper/0 (pid: 1, threadinfo ffff8800d42ca000, task ffff8800d42d0000) Stack: 00000000ffffffef 0000000000000010 ffff8800d42cbbe0 ffffffff8134f157 ffffffff8100a9b2 ffffffff8182ffd1 00000000000000a0 00000000829e7384 0000000000000002 0000000000000010 00000000ffffffff 0000000000000000 Call Trace: [] xen_bind_pirq_gsi_to_irq+0x87/0x230 [] ? check_events+0x12+0x20 [] xen_register_pirq+0x82/0xe0 [] xen_register_gsi.part.2+0x4a/0xd0 [] acpi_register_gsi_xen+0x20/0x30 [] acpi_register_gsi+0xf/0x20 [] acpi_pci_irq_enable+0x12e/0x202 [] pcibios_enable_device+0x39/0x40 [] do_pci_enable_device+0x4b/0x70 [] __pci_enable_device_flags+0xa8/0xf0 [] pci_enable_device+0x13/0x20 The reason we are dying is b/c the call acpi_get_override_irq() is used, which returns the polarity and trigger for the IRQs. That function calls mp_find_ioapics to get the 'struct ioapic' structure - which along with the mp_irq[x] is used to figure out the default values and the polarity/trigger overrides. Since the mp_find_ioapics now returns -1 [b/c the IOAPIC is filled with 0xffffffff], the acpi_get_override_irq() stops trying to lookup in the mp_irq[x] the proper INT_SRV_OVR and we can't install the SCI interrupt. The proper fix for this is going in v3.5 and adds an x86_io_apic_ops struct so that platforms can override it. But for v3.4 lets carry this work-around. This patch does that by providing a slightly different variant of the fake IOAPIC entries. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 1a309ee2331e..91dc2871e336 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, #endif /* CONFIG_X86_64 */ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; +static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss; static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { @@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) * We just don't map the IO APIC - all access is via * hypercalls. Keep the address in the pte for reference. */ - pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); break; #endif @@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); + memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE); } /* Protected by xen_reservation_lock. */ -- cgit v1.2.3 From e8c9e788f493d3236809e955c9fc12625a461e09 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 22 Mar 2012 18:29:24 +0530 Subject: xen/smp: Remove unnecessary call to smp_processor_id() There is an extra and unnecessary call to smp_processor_id() in cpu_bringup(). Remove it. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 240def438dc3..e845555ff486 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) static void __cpuinit cpu_bringup(void) { - int cpu = smp_processor_id(); + int cpu; cpu_init(); touch_softlockup_watchdog(); -- cgit v1.2.3 From 3cb42092ff02edec34bf936b7400b1f1efc8ca43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Apr 2012 13:59:00 -0400 Subject: um: fix linker script generation while we can't just use -U$(SUBARCH), we still need to kill idiotic define (implicit -Di386=1), both for SUBARCH=i386 and SUBARCH=x86/CONFIG_64BIT=n builds. Signed-off-by: Al Viro --- arch/um/kernel/Makefile | 7 ++++--- arch/x86/Makefile.um | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 492bc4c1b62b..65a1c3d690ea 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -3,9 +3,10 @@ # Licensed under the GPL # -CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ - -DELF_ARCH=$(LDS_ELF_ARCH) \ - -DELF_FORMAT=$(LDS_ELF_FORMAT) +CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ + -DELF_ARCH=$(LDS_ELF_ARCH) \ + -DELF_FORMAT=$(LDS_ELF_FORMAT) \ + $(LDS_EXTRA) extra-y := vmlinux.lds clean-files := diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 4be406abeefd..36b62bc52638 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32) export LDFLAGS +LDS_EXTRA := -Ui386 +export LDS_EXTRA + # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. include $(srctree)/arch/x86/Makefile_32.cpu -- cgit v1.2.3 From d1640130cda146ed925f12434bfe579ee7d80a1c Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 22 Mar 2012 16:59:11 +0530 Subject: tile/CPU hotplug: Add missing call to notify_cpu_starting() The scheduler depends on receiving the CPU_STARTING notification, without which we end up into a lot of trouble. So add the missing call to notify_cpu_starting() in the bringup code. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Chris Metcalf --- arch/tile/kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index b949edcec200..172aef7d3159 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -196,6 +196,8 @@ void __cpuinit online_secondary(void) /* This must be done before setting cpu_online_mask */ wmb(); + notify_cpu_starting(smp_processor_id()); + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of -- cgit v1.2.3 From d824d06328904f610b47652dcd488392f2fc62b6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 5 Apr 2012 23:35:03 -0400 Subject: um: switch cow_user.h to htobe{32,64}/betoh{32,64} ... rather than open-coding the 64bit versions. endian.h has those guys. Signed-off-by: Al Viro Signed-off-by: Richard Weinberger --- arch/um/drivers/cow.h | 35 ----------------------------------- arch/um/drivers/cow_user.c | 43 +++++++++++++++++++++---------------------- 2 files changed, 21 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h index dc36b222100b..6673508f3426 100644 --- a/arch/um/drivers/cow.h +++ b/arch/um/drivers/cow.h @@ -3,41 +3,6 @@ #include -#if defined(__KERNEL__) - -# include - -# if defined(__BIG_ENDIAN) -# define ntohll(x) (x) -# define htonll(x) (x) -# elif defined(__LITTLE_ENDIAN) -# define ntohll(x) be64_to_cpu(x) -# define htonll(x) cpu_to_be64(x) -# else -# error "Could not determine byte order" -# endif - -#else -/* For the definition of ntohl, htonl and __BYTE_ORDER */ -#include -#include -#if defined(__BYTE_ORDER) - -# if __BYTE_ORDER == __BIG_ENDIAN -# define ntohll(x) (x) -# define htonll(x) (x) -# elif __BYTE_ORDER == __LITTLE_ENDIAN -# define ntohll(x) bswap_64(x) -# define htonll(x) bswap_64(x) -# else -# error "Could not determine byte order: __BYTE_ORDER uncorrectly defined" -# endif - -#else /* ! defined(__BYTE_ORDER) */ -# error "Could not determine byte order: __BYTE_ORDER not defined" -#endif -#endif /* ! defined(__KERNEL__) */ - extern int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out); diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c index 9cbb426c0b91..0ee9cc6cc4c7 100644 --- a/arch/um/drivers/cow_user.c +++ b/arch/um/drivers/cow_user.c @@ -8,11 +8,10 @@ * that. */ #include -#include #include #include #include -#include +#include #include "cow.h" #include "cow_sys.h" @@ -214,8 +213,8 @@ int write_cow_header(char *cow_file, int fd, char *backing_file, "header\n"); goto out; } - header->magic = htonl(COW_MAGIC); - header->version = htonl(COW_VERSION); + header->magic = htobe32(COW_MAGIC); + header->version = htobe32(COW_VERSION); err = -EINVAL; if (strlen(backing_file) > sizeof(header->backing_file) - 1) { @@ -246,10 +245,10 @@ int write_cow_header(char *cow_file, int fd, char *backing_file, goto out_free; } - header->mtime = htonl(modtime); - header->size = htonll(*size); - header->sectorsize = htonl(sectorsize); - header->alignment = htonl(alignment); + header->mtime = htobe32(modtime); + header->size = htobe64(*size); + header->sectorsize = htobe32(sectorsize); + header->alignment = htobe32(alignment); header->cow_format = COW_BITMAP; err = cow_write_file(fd, header, sizeof(*header)); @@ -301,8 +300,8 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, magic = header->v1.magic; if (magic == COW_MAGIC) version = header->v1.version; - else if (magic == ntohl(COW_MAGIC)) - version = ntohl(header->v1.version); + else if (magic == be32toh(COW_MAGIC)) + version = be32toh(header->v1.version); /* No error printed because the non-COW case comes through here */ else goto out; @@ -327,9 +326,9 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, "header\n"); goto out; } - *mtime_out = ntohl(header->v2.mtime); - *size_out = ntohll(header->v2.size); - *sectorsize_out = ntohl(header->v2.sectorsize); + *mtime_out = be32toh(header->v2.mtime); + *size_out = be64toh(header->v2.size); + *sectorsize_out = be32toh(header->v2.sectorsize); *bitmap_offset_out = sizeof(header->v2); *align_out = *sectorsize_out; file = header->v2.backing_file; @@ -341,10 +340,10 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, "header\n"); goto out; } - *mtime_out = ntohl(header->v3.mtime); - *size_out = ntohll(header->v3.size); - *sectorsize_out = ntohl(header->v3.sectorsize); - *align_out = ntohl(header->v3.alignment); + *mtime_out = be32toh(header->v3.mtime); + *size_out = be64toh(header->v3.size); + *sectorsize_out = be32toh(header->v3.sectorsize); + *align_out = be32toh(header->v3.alignment); if (*align_out == 0) { cow_printf("read_cow_header - invalid COW header, " "align == 0\n"); @@ -366,16 +365,16 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, * this was used until Dec2005 - 64bits are needed to represent * 2038+. I.e. we can safely do this truncating cast. * - * Additionally, we must use ntohl() instead of ntohll(), since + * Additionally, we must use be32toh() instead of be64toh(), since * the program used to use the former (tested - I got mtime * mismatch "0 vs whatever"). * * Ever heard about bug-to-bug-compatibility ? ;-) */ - *mtime_out = (time32_t) ntohl(header->v3_b.mtime); + *mtime_out = (time32_t) be32toh(header->v3_b.mtime); - *size_out = ntohll(header->v3_b.size); - *sectorsize_out = ntohl(header->v3_b.sectorsize); - *align_out = ntohl(header->v3_b.alignment); + *size_out = be64toh(header->v3_b.size); + *sectorsize_out = be32toh(header->v3_b.sectorsize); + *align_out = be32toh(header->v3_b.alignment); if (*align_out == 0) { cow_printf("read_cow_header - invalid COW header, " "align == 0\n"); -- cgit v1.2.3 From a3a85a763c399c0bf483a30d82d2d613e6f94cd3 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 29 Mar 2012 18:47:46 +0200 Subject: um: Disintegrate asm/system.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Richard Weinberger Reported-by: Toralf Förster CC: dhowells@redhat.com --- arch/um/drivers/mconsole_kern.c | 1 + arch/um/include/asm/Kbuild | 2 +- arch/x86/um/asm/barrier.h | 75 ++++++++++++++++++++++ arch/x86/um/asm/switch_to.h | 7 +++ arch/x86/um/asm/system.h | 135 ---------------------------------------- 5 files changed, 84 insertions(+), 136 deletions(-) create mode 100644 arch/x86/um/asm/barrier.h create mode 100644 arch/x86/um/asm/switch_to.h delete mode 100644 arch/x86/um/asm/system.h (limited to 'arch') diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index e672bd6d43e3..43b39d61b538 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "init.h" #include "irq_kern.h" diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 8419f5cf2ac7..bb5d6e68829b 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,3 +1,3 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h -generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h +generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h new file mode 100644 index 000000000000..7d01b8c56c00 --- /dev/null +++ b/arch/x86/um/asm/barrier.h @@ -0,0 +1,75 @@ +#ifndef _ASM_UM_BARRIER_H_ +#define _ASM_UM_BARRIER_H_ + +#include +#include +#include +#include +#include + +#include +#include + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 + +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) + +#else /* CONFIG_X86_32 */ + +#define mb() asm volatile("mfence" : : : "memory") +#define rmb() asm volatile("lfence" : : : "memory") +#define wmb() asm volatile("sfence" : : : "memory") + +#endif /* CONFIG_X86_32 */ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP + +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +#define smp_rmb() rmb() +#else /* CONFIG_X86_PPRO_FENCE */ +#define smp_rmb() barrier() +#endif /* CONFIG_X86_PPRO_FENCE */ + +#ifdef CONFIG_X86_OOSTORE +#define smp_wmb() wmb() +#else /* CONFIG_X86_OOSTORE */ +#define smp_wmb() barrier() +#endif /* CONFIG_X86_OOSTORE */ + +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) + +#else /* CONFIG_SMP */ + +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) + +#endif /* CONFIG_SMP */ + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/x86/um/asm/switch_to.h b/arch/x86/um/asm/switch_to.h new file mode 100644 index 000000000000..cf97d20da61f --- /dev/null +++ b/arch/x86/um/asm/switch_to.h @@ -0,0 +1,7 @@ +#ifndef _ASM_UM_SWITCH_TO_H_ +#define _ASM_UM_SWITCH_TO_H_ + +extern void *_switch_to(void *prev, void *next, void *last); +#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) + +#endif diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h deleted file mode 100644 index a459fd9b7598..000000000000 --- a/arch/x86/um/asm/system.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ - -#include -#include -#include -#include -#include - -#include -#include - -/* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION -# define AT_VECTOR_SIZE_ARCH 2 -#else -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -extern unsigned long arch_align_stack(unsigned long sp); - -void default_idle(void); - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) -#else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") -#endif - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static inline void rdtsc_barrier(void) -{ - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); -} - -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif -- cgit v1.2.3 From 76b278edd99fb55525fcf2706095e388bd3d122c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 29 Mar 2012 19:10:42 +0200 Subject: um: Use asm-generic/switch_to.h Signed-off-by: Richard Weinberger --- arch/um/include/asm/Kbuild | 1 + arch/um/kernel/process.c | 6 +----- arch/x86/um/asm/switch_to.h | 7 ------- 3 files changed, 2 insertions(+), 12 deletions(-) delete mode 100644 arch/x86/um/asm/switch_to.h (limited to 'arch') diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index bb5d6e68829b..fff24352255d 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h +generic-y += switch_to.h diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index f386d04a84a5..2b73dedb44ca 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -88,11 +88,8 @@ static inline void set_current(struct task_struct *task) extern void arch_switch_to(struct task_struct *to); -void *_switch_to(void *prev, void *next, void *last) +void *__switch_to(struct task_struct *from, struct task_struct *to) { - struct task_struct *from = prev; - struct task_struct *to = next; - to->thread.prev_sched = from; set_current(to); @@ -111,7 +108,6 @@ void *_switch_to(void *prev, void *next, void *last) } while (current->thread.saved_task); return current->thread.prev_sched; - } void interrupt_end(void) diff --git a/arch/x86/um/asm/switch_to.h b/arch/x86/um/asm/switch_to.h deleted file mode 100644 index cf97d20da61f..000000000000 --- a/arch/x86/um/asm/switch_to.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _ASM_UM_SWITCH_TO_H_ -#define _ASM_UM_SWITCH_TO_H_ - -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif -- cgit v1.2.3 From 657b12d3a1508a3f06f7afe21d5dda7252279520 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 26 Mar 2012 19:18:22 -0700 Subject: um: uml_setup_stubs': warning: unused variable 'pages' Fix the following gcc complain arch/um/kernel/skas/mmu.c: In function 'uml_setup_stubs': arch/um/kernel/skas/mmu.c:106:16: warning: unused variable 'pages' [-Wunused-variable] Signed-Signed-off-by: Boaz Harrosh Signed-off-by: Richard Weinberger --- arch/um/kernel/skas/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 4947b319f53a..0a49ef0c2bf4 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -103,7 +103,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) void uml_setup_stubs(struct mm_struct *mm) { - struct page **pages; int err, ret; if (!skas_needs_stub) -- cgit v1.2.3 From 08f1ec8a594c60bf3856e3c45b6d15fd691d90bb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Apr 2012 17:21:35 +1000 Subject: powerpc: Fix page fault with lockdep regression commit a546498f3bf9aac311c66f965186373aee2ca0b0 introduced a regression on 32-bit when irq tracing is enabled by exposing an old bug in our irq tracing code for exception entry. The code would save and restore some GPRs around the calls to the C lockdep code, however, it tries to be too smart for its own good and restores some of the GPRs from the exception frame (as saved there on exception entry). However, for page faults, we do replace those GPRs with arguments to do_page_fault before we call transfer_to_handler and so restoring from the exception frame is plain wrong in this case. This was fine as long as we didn't touch the interrupt state when taking page fault, but when I started doing it, it would trigger the lockdep calls and the bug. This fixes it by cleaning up that code a bit. It did create a small stack frame for the sake of backtraces, so let's make it a bit bigger and use it to save and restore the stuff we care about. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/entry_32.S | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3e57a00b8cba..ba3aeb4bc06a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -206,40 +206,43 @@ reenable_mmu: /* re-enable mmu so we can */ andi. r10,r10,MSR_EE /* Did EE change? */ beq 1f - /* Save handler and return address into the 2 unused words - * of the STACK_FRAME_OVERHEAD (sneak sneak sneak). Everything - * else can be recovered from the pt_regs except r3 which for - * normal interrupts has been set to pt_regs and for syscalls - * is an argument, so we temporarily use ORIG_GPR3 to save it - */ - stw r9,8(r1) - stw r11,12(r1) - stw r3,ORIG_GPR3(r1) /* * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. * If from user mode there is only one stack frame on the stack, and * accessing CALLER_ADDR1 will cause oops. So we need create a dummy * stack frame to make trace_hardirqs_off happy. + * + * This is handy because we also need to save a bunch of GPRs, + * r3 can be different from GPR3(r1) at this point, r9 and r11 + * contains the old MSR and handler address respectively, + * r4 & r5 can contain page fault arguments that need to be passed + * along as well. r12, CCR, CTR, XER etc... are left clobbered as + * they aren't useful past this point (aren't syscall arguments), + * the rest is restored from the exception frame. */ + stwu r1,-32(r1) + stw r9,8(r1) + stw r11,12(r1) + stw r3,16(r1) + stw r4,20(r1) + stw r5,24(r1) andi. r12,r12,MSR_PR - beq 11f - stwu r1,-16(r1) + b 11f bl trace_hardirqs_off - addi r1,r1,16 b 12f - 11: bl trace_hardirqs_off 12: + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r11,12(r1) + lwz r9,8(r1) + addi r1,r1,32 lwz r0,GPR0(r1) - lwz r3,ORIG_GPR3(r1) - lwz r4,GPR4(r1) - lwz r5,GPR5(r1) lwz r6,GPR6(r1) lwz r7,GPR7(r1) lwz r8,GPR8(r1) - lwz r9,8(r1) - lwz r11,12(r1) 1: mtctr r11 mtlr r9 bctr /* jump to handler */ -- cgit v1.2.3 From fae2e0fb24c61ca68c98d854a34732549ebc1854 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 11 Apr 2012 10:42:15 +1000 Subject: powerpc: Fix typo in runlatch code Commit fe1952fc0afb9a2e4c79f103c08aef5d13db1873 "powerpc: Rework runlatch code" has a nasty typo where it uses "TLF_RUNLATCH" instead of "_TLF_RUNLATCH" (bit number instead of bit mask), causing some flags to be potentially lost such as _TLF_RESTORE_SIGMASK (Brown paper bag for me ! We should be able to make that break at compile time with a bit of magic, any volunteer ?) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f88698c0f332..4937c9690090 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1235,7 +1235,7 @@ void __ppc64_runlatch_on(void) ctrl |= CTRL_RUNLATCH; mtspr(SPRN_CTRLT, ctrl); - ti->local_flags |= TLF_RUNLATCH; + ti->local_flags |= _TLF_RUNLATCH; } /* Called with hard IRQs off */ @@ -1244,7 +1244,7 @@ void __ppc64_runlatch_off(void) struct thread_info *ti = current_thread_info(); unsigned long ctrl; - ti->local_flags &= ~TLF_RUNLATCH; + ti->local_flags &= ~_TLF_RUNLATCH; ctrl = mfspr(SPRN_CTRLF); ctrl &= ~CTRL_RUNLATCH; -- cgit v1.2.3 From a699e4e49ec3fb62c4a44394357d14081df10bef Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 3 Apr 2012 07:11:04 -0600 Subject: irq: Kill pointless irqd_to_hw export It makes no sense to export this trivial function. Make it a static inline instead. This patch also drops virq_to_hw from arch/c6x since it is unused by that architecture. v2: Move irq_hw_number_t into types.h to fix ARM build failure Signed-off-by: Grant Likely Acked-by: Thomas Gleixner Cc: Benjamin Herrenschmidt --- arch/c6x/include/asm/irq.h | 4 ---- arch/c6x/kernel/irq.c | 13 ------------- arch/powerpc/include/asm/irq.h | 2 -- arch/powerpc/kernel/irq.c | 6 ------ include/linux/irq.h | 5 +++++ include/linux/irqdomain.h | 6 ------ include/linux/types.h | 6 ++++++ 7 files changed, 11 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/c6x/include/asm/irq.h b/arch/c6x/include/asm/irq.h index f13b78d5e1ca..ab4577f93d96 100644 --- a/arch/c6x/include/asm/irq.h +++ b/arch/c6x/include/asm/irq.h @@ -42,10 +42,6 @@ /* This number is used when no interrupt has been assigned */ #define NO_IRQ 0 -struct irq_data; -extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d); -extern irq_hw_number_t virq_to_hw(unsigned int virq); - extern void __init init_pic_c64xplus(void); extern void init_IRQ(void); diff --git a/arch/c6x/kernel/irq.c b/arch/c6x/kernel/irq.c index 65b8ddf54b44..c90fb5e82ad7 100644 --- a/arch/c6x/kernel/irq.c +++ b/arch/c6x/kernel/irq.c @@ -130,16 +130,3 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } - -irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -{ - return d->hwirq; -} -EXPORT_SYMBOL_GPL(irqd_to_hwirq); - -irq_hw_number_t virq_to_hw(unsigned int virq) -{ - struct irq_data *irq_data = irq_get_irq_data(virq); - return WARN_ON(!irq_data) ? 0 : irq_data->hwirq; -} -EXPORT_SYMBOL_GPL(virq_to_hw); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index cf417e510736..e648af92ced1 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -33,8 +33,6 @@ extern atomic_t ppc_n_lost_interrupts; /* Same thing, used by the generic IRQ code */ #define NR_IRQS_LEGACY NUM_ISA_INTERRUPTS -struct irq_data; -extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d); extern irq_hw_number_t virq_to_hw(unsigned int virq); /** diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 243dbabfe74d..5ec1b2354ca6 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -560,12 +560,6 @@ void do_softirq(void) local_irq_restore(flags); } -irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -{ - return d->hwirq; -} -EXPORT_SYMBOL_GPL(irqd_to_hwirq); - irq_hw_number_t virq_to_hw(unsigned int virq) { struct irq_data *irq_data = irq_get_irq_data(virq); diff --git a/include/linux/irq.h b/include/linux/irq.h index bff29c58da23..7810406f3d80 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -263,6 +263,11 @@ static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d) d->state_use_accessors &= ~IRQD_IRQ_INPROGRESS; } +static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) +{ + return d->hwirq; +} + /** * struct irq_chip - hardware interrupt chip descriptor * diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ead4a4215797..ac17b9b2e7be 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -42,12 +42,6 @@ struct of_device_id; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 -/* This type is the placeholder for a hardware interrupt number. It has to - * be big enough to enclose whatever representation is used by a given - * platform. - */ -typedef unsigned long irq_hw_number_t; - /** * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns diff --git a/include/linux/types.h b/include/linux/types.h index e5fa50345516..7f480db60231 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -210,6 +210,12 @@ typedef u32 phys_addr_t; typedef phys_addr_t resource_size_t; +/* + * This type is the placeholder for a hardware interrupt number. It has to be + * big enough to enclose whatever representation is used by a given platform. + */ +typedef unsigned long irq_hw_number_t; + typedef struct { int counter; } atomic_t; -- cgit v1.2.3 From 92ae03f2ef99fbc23bfa9080d6b58f25227bd7ef Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 6 Apr 2012 14:32:32 -0700 Subject: x86: merge 32/64-bit versions of 'strncpy_from_user()' and speed it up This merges the 32- and 64-bit versions of the x86 strncpy_from_user() by just rewriting it in C rather than the ancient inline asm versions that used lodsb/stosb and had been duplicated for (trivial) differences between the 32-bit and 64-bit versions. While doing that, it also speeds them up by doing the accesses a word at a time. Finally, the new routines also properly handle the case of hitting the end of the address space, which we have never done correctly before (fs/namei.c has a hack around it for that reason). Despite all these improvements, it actually removes more lines than it adds, due to the de-duplication. Also, we no longer export (or define) the legacy __strncpy_from_user() function (that was defined to not do the user permission checks), since it's not actually used anywhere, and the user address space checks are built in to the new code. Other architecture maintainers have been notified that the old hack in fs/namei.c will be going away in the 3.5 merge window, in case they copied the x86 approach of being a bit cavalier about the end of the address space. Cc: linux-arch@vger.kernel.org Cc: Ingo Molnar Cc: Peter Anvin" Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 2 + arch/x86/include/asm/uaccess_32.h | 5 -- arch/x86/include/asm/uaccess_64.h | 4 -- arch/x86/lib/usercopy.c | 103 ++++++++++++++++++++++++++++++++++++++ arch/x86/lib/usercopy_32.c | 87 -------------------------------- arch/x86/lib/usercopy_64.c | 49 ------------------ 6 files changed, 105 insertions(+), 145 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54d9360..e0544597cfe7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); /* * movsl can be slow when source and dest are not both 8-byte aligned diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803cc602..8084bc73b18c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -long __must_check strncpy_from_user(char *dst, const char __user *src, - long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); - /** * strlen_user: - Get the size of a string in user space. * @str: The string to measure. diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30971ad..fcd4b6f3ef02 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long -strncpy_from_user(char *dst, const char __user *src, long count); -__must_check long -__strncpy_from_user(char *dst, const char __user *src, long count); __must_check long strnlen_user(const char __user *str, long n); __must_check long __strnlen_user(const char __user *str, long n); __must_check long strlen_user(const char __user *str); diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb54483..57252c928f56 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include #include +#include + /* * best effort, GUP based copy_from_user() that is NMI-safe */ @@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); + +static inline unsigned long count_bytes(unsigned long mask) +{ + mask = (mask - 1) & ~mask; + mask >>= 7; + return count_masked_bytes(mask); +} + +/* + * Do a strncpy, return length of string without final '\0'. + * 'count' is the user-supplied count (return 'count' if we + * hit it), 'max' is the address space maximum (and we return + * -EFAULT if we hit it). + */ +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, long max) +{ + long res = 0; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + while (max >= sizeof(unsigned long)) { + unsigned long c; + + /* Fall back to byte-at-a-time if we get a page fault */ + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + break; + /* This can write a few bytes past the NUL character, but that's ok */ + *(unsigned long *)(dst+res) = c; + c = has_zero(c); + if (c) + return res + count_bytes(c); + res += sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (max) { + char c; + + if (unlikely(__get_user(c,src+res))) + return -EFAULT; + dst[res] = c; + if (!c) + return res; + res++; + max--; + } + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, that's ok - we got as much as the user asked for. + */ + if (res >= count) + return count; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's an EFAULT. + */ + return -EFAULT; +} + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = current_thread_info()->addr_limit.seg; + src_addr = (unsigned long)src; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strncpy_from_user(dst, src, count, max); + } + return -EFAULT; +} +EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index d9b094ca7aaa..ef2a6a5d78e3 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -32,93 +32,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon #define movsl_is_ok(a1, a2, n) \ __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) -/* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst, src, count, res) \ -do { \ - int __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - /* * Zero Userspace */ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849ffb66..0d0326f388c0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -8,55 +8,6 @@ #include #include -/* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - long __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testq %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decq %1\n" \ - " jnz 0b\n" \ - "1: subq %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movq %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - /* * Zero Userspace */ -- cgit v1.2.3 From e72d5c7e9c831f6e393c71dcd62acafbac2b58d0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 11 Apr 2012 12:45:20 -0400 Subject: arch/tile: avoid unused variable warning in proc.c for tilegx Until we push the unaligned access support for tilegx, it's silly to have arch/tile/kernel/proc.c generate a warning about an unused variable. Extend the #ifdef to cover all the code and data for now. Signed-off-by: Chris Metcalf --- arch/tile/kernel/proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 7a9327046404..446a7f52cc11 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -146,7 +146,6 @@ static ctl_table unaligned_table[] = { }, {} }; -#endif static struct ctl_path tile_path[] = { { .procname = "tile" }, @@ -155,10 +154,9 @@ static struct ctl_path tile_path[] = { static int __init proc_sys_tile_init(void) { -#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ register_sysctl_paths(tile_path, unaligned_table); -#endif return 0; } arch_initcall(proc_sys_tile_init); +#endif -- cgit v1.2.3 From 6fa6c8e25e95bdc73e92e4c96b8e3299169b616e Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 15 Feb 2012 15:06:08 -0700 Subject: irq_domain: Move irq_virq_count into NOMAP revmap This patch replaces the old global setting of irq_virq_count that is only used by the NOMAP mapping and instead uses a revmap_data property so that the maximum NOMAP allocation can be set per NOMAP irq_domain. There is exactly one user of irq_virq_count in-tree right now: PS3. Also, irq_virq_count is only useful for the NOMAP mapping. So, instead of having a single global irq_virq_count values, this change drops it entirely and added a max_irq argument to irq_domain_add_nomap(). That makes it a property of an individual nomap irq domain instead of a global system settting. Signed-off-by: Grant Likely Tested-by: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Milton Miller --- arch/powerpc/platforms/cell/axon_msi.c | 2 +- arch/powerpc/platforms/cell/beat_interrupt.c | 2 +- arch/powerpc/platforms/powermac/smp.c | 2 +- arch/powerpc/platforms/ps3/interrupt.c | 3 +-- include/linux/irqdomain.h | 6 +++-- kernel/irq/irqdomain.c | 33 ++++++++-------------------- 6 files changed, 17 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index db360fc4cf0e..d09f3e8e6867 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -392,7 +392,7 @@ static int axon_msi_probe(struct platform_device *device) } memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES); - msic->irq_domain = irq_domain_add_nomap(dn, &msic_host_ops, msic); + msic->irq_domain = irq_domain_add_nomap(dn, 0, &msic_host_ops, msic); if (!msic->irq_domain) { printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n", dn->full_name); diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index e5c3a2c6090d..f9a48af335cb 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void) ppc_md.get_irq = beatic_get_irq; /* Allocate an irq host */ - beatic_host = irq_domain_add_nomap(NULL, &beatic_pic_host_ops, NULL); + beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); BUG_ON(beatic_host == NULL); irq_set_default_host(beatic_host); } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index a81e5a88fbdf..b4ddaa3fbb29 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void) { int rc = -ENOMEM; - psurge_host = irq_domain_add_nomap(NULL, &psurge_host_ops, NULL); + psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); if (psurge_host) psurge_secondary_virq = irq_create_direct_mapping(psurge_host); diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 2a4ff86cc21f..5f3b23220b8e 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -753,9 +753,8 @@ void __init ps3_init_IRQ(void) unsigned cpu; struct irq_domain *host; - host = irq_domain_add_nomap(NULL, &ps3_host_ops, NULL); + host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL); irq_set_default_host(host); - irq_set_virq_count(PS3_PLUG_MAX + 1); for_each_possible_cpu(cpu) { struct ps3_private *pd = &per_cpu(ps3_private, cpu); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ac17b9b2e7be..c65740d76e66 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -98,6 +98,9 @@ struct irq_domain { unsigned int size; unsigned int *revmap; } linear; + struct { + unsigned int max_irq; + } nomap; struct radix_tree_root tree; } revmap_data; const struct irq_domain_ops *ops; @@ -120,6 +123,7 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node, const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, + unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_domain_add_tree(struct device_node *of_node, @@ -128,7 +132,6 @@ struct irq_domain *irq_domain_add_tree(struct device_node *of_node, extern struct irq_domain *irq_find_host(struct device_node *node); extern void irq_set_default_host(struct irq_domain *host); -extern void irq_set_virq_count(unsigned int count); static inline struct irq_domain *irq_domain_add_legacy_isa( struct device_node *of_node, @@ -140,7 +143,6 @@ static inline struct irq_domain *irq_domain_add_legacy_isa( } extern struct irq_domain *irq_find_host(struct device_node *node); extern void irq_set_default_host(struct irq_domain *host); -extern void irq_set_virq_count(unsigned int count); extern unsigned int irq_create_mapping(struct irq_domain *host, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index eb05e40f4553..d34413e78628 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list); static DEFINE_MUTEX(irq_domain_mutex); static DEFINE_MUTEX(revmap_trees_mutex); -static unsigned int irq_virq_count = NR_IRQS; static struct irq_domain *irq_default_domain; /** @@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node, } struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, + unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain *domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_NOMAP, ops, host_data); - if (domain) + if (domain) { + domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; irq_domain_add(domain); + } return domain; } @@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain) irq_default_domain = domain; } -/** - * irq_set_virq_count() - Set the maximum number of linux irqs - * @count: number of linux irqs, capped with NR_IRQS - * - * This is mainly for use by platforms like iSeries who want to program - * the virtual irq number in the controller to avoid the reverse mapping - */ -void irq_set_virq_count(unsigned int count) -{ - pr_debug("irq: Trying to set virq count to %d\n", count); - - BUG_ON(count < NUM_ISA_INTERRUPTS); - if (count < NR_IRQS) - irq_virq_count = count; -} - static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { @@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) pr_debug("irq: create_direct virq allocation failed\n"); return 0; } - if (virq >= irq_virq_count) { + if (virq >= domain->revmap_data.nomap.max_irq) { pr_err("ERROR: no free irqs available below %i maximum\n", - irq_virq_count); + domain->revmap_data.nomap.max_irq); irq_free_desc(virq); return 0; } - pr_debug("irq: create_direct obtained virq %d\n", virq); if (irq_setup_virq(domain, virq, virq)) { @@ -378,7 +363,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain, return irq_domain_legacy_revmap(domain, hwirq); /* Allocate a virtual interrupt number */ - hint = hwirq % irq_virq_count; + hint = hwirq % nr_irqs; if (hint == 0) hint++; virq = irq_alloc_desc_from(hint, 0); @@ -516,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { unsigned int i; - unsigned int hint = hwirq % irq_virq_count; + unsigned int hint = hwirq % nr_irqs; /* Look for default domain if nececssary */ if (domain == NULL) @@ -537,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, if (data && (data->domain == domain) && (data->hwirq == hwirq)) return i; i++; - if (i >= irq_virq_count) + if (i >= nr_irqs) i = 1; } while(i != hint); return 0; -- cgit v1.2.3