From e33e89ab1a8d295de0500b697f4f31c3ceee9aa2 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog from procfs (update) Adds a new /proc/sys/kernel/nmi_watchdog call that will enable/disable the nmi watchdog. By entering a non-zero value here, a user can enable the nmi watchdog to monitor the online cpus in the system. By entering a zero value here, a user can disable the nmi watchdog and free up a performance counter which could then be utilized by the oprofile subsystem, otherwise oprofile may be short a counter when in use. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 99902ae6804e..7db71d6fba82 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1124,11 +1124,15 @@ debugging information is displayed on console. NMI switch that most IA32 servers have fires unknown NMI up, for example. If a system hangs up, try pressing the NMI switch. -[NOTE] - This function and oprofile share a NMI callback. Therefore this function - cannot be enabled when oprofile is activated. - And NMI watchdog will be disabled when the value in this file is set to - non-zero. +nmi_watchdog +------------ + +Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero +the NMI watchdog is enabled and will continuously test all online cpus to +determine whether or not they are still functioning properly. + +Because the NMI watchdog shares registers with oprofile, by disabling the NMI +watchdog, oprofile may have more registers to utilize. 2.4 /proc/sys/vm - The virtual memory subsystem -- cgit v1.2.3 From 538b5b419c7ae39a4b2deb15278da36102e42346 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] Document backtracer selection options Signed-off-by: Andi Kleen --- Documentation/x86_64/boot-options.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 6da24e7a56cb..4303e0c12476 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -245,6 +245,13 @@ Debugging newfallback: use new unwinder but fall back to old if it gets stuck (default) + call_trace=[old|both|newfallback|new] + old: use old inexact backtracer + new: use new exact dwarf2 unwinder + both: print entries from both + newfallback: use new unwinder but fall back to old if it gets + stuck (default) + Misc noreplacement Don't replace instructions with more appropriate ones -- cgit v1.2.3 From e2414910f212c52d9d7c64c99a22863488ac5b48 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:30 +0200 Subject: [PATCH] x86: Detect CFI support in the assembler at runtime ... instead of using a CONFIG option. The config option still controls if the resulting executable actually has unwind information. This is useful to prevent compilation errors when users select CONFIG_STACK_UNWIND on old binutils and also allows to use CFI in the future for non kernel debugging applications. Cc: jbeulich@novell.com Cc: sam@ravnborg.org Signed-off-by: Andi Kleen --- Documentation/kbuild/makefiles.txt | 5 +++++ arch/i386/Makefile | 4 ++++ arch/x86_64/Makefile | 3 +++ include/asm-x86_64/dwarf2.h | 2 +- scripts/Kbuild.include | 7 +++++++ 5 files changed, 20 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index b7d6abb501a6..e2cbd59cf2d0 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -421,6 +421,11 @@ more details, with real examples. The second argument is optional, and if supplied will be used if first argument is not supported. + as-instr + as-instr checks if the assembler reports a specific instruction + and then outputs either option1 or option2 + C escapes are supported in the test instruction + cc-option cc-option is used to check if $(CC) supports a given option, and not supported to use an optional second option. diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 3e4adb1e2244..508cdbeb3a09 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -46,6 +46,10 @@ cflags-y += -ffreestanding # a lot more stack due to the lack of sharing of stacklots: CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) +# do binutils support CFI? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + CFLAGS += $(cflags-y) # Default subarch .c files diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 431bb4bc36cd..d6472ddf5f6e 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -54,6 +54,9 @@ endif cflags-y += $(call cc-option,-funit-at-a-time) # prevent gcc from generating any FP code by mistake cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +# do binutils support CFI? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) CFLAGS += $(cflags-y) CFLAGS_KERNEL += $(cflags-kernel-y) diff --git a/include/asm-x86_64/dwarf2.h b/include/asm-x86_64/dwarf2.h index 0744db777676..2b9368365fad 100644 --- a/include/asm-x86_64/dwarf2.h +++ b/include/asm-x86_64/dwarf2.h @@ -13,7 +13,7 @@ away for older version. */ -#ifdef CONFIG_UNWIND_INFO +#ifdef CONFIG_AS_CFI #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 3d523899fdc0..7adef12a0c26 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -63,6 +63,13 @@ as-option = $(shell if $(CC) $(CFLAGS) $(1) -Wa,-Z -c -o /dev/null \ -xassembler /dev/null > /dev/null 2>&1; then echo "$(1)"; \ else echo "$(2)"; fi ;) +# as-instr +# Usage: cflags-y += $(call as-instr, instr, option1, option2) + +as-instr = $(shell if echo -e "$(1)" | $(AS) -Z -o astest$$$$.out \ + 2>&1 >/dev/null ; then echo "$(2)"; else echo "$(3)"; fi; \ + rm -f astest$$$$.out) + # cc-option # Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586) -- cgit v1.2.3 From 352f7bae81faa2befa2a3c02b84478dce16b8fd6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:31 +0200 Subject: [PATCH] Add stack documentation document from Keith Owens Describes the stack organization on x86-64. I changed it a bit and removed some obsolete information and the questions. Cc: kaos@sgi.com Signed-off-by: Andi Kleen --- Documentation/x86_64/kernel-stacks | 99 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 Documentation/x86_64/kernel-stacks (limited to 'Documentation') diff --git a/Documentation/x86_64/kernel-stacks b/Documentation/x86_64/kernel-stacks new file mode 100644 index 000000000000..bddfddd466ab --- /dev/null +++ b/Documentation/x86_64/kernel-stacks @@ -0,0 +1,99 @@ +Most of the text from Keith Owens, hacked by AK + +x86_64 page size (PAGE_SIZE) is 4K. + +Like all other architectures, x86_64 has a kernel stack for every +active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big. +These stacks contain useful data as long as a thread is alive or a +zombie. While the thread is in user space the kernel stack is empty +except for the thread_info structure at the bottom. + +In addition to the per thread stacks, there are specialized stacks +associated with each cpu. These stacks are only used while the kernel +is in control on that cpu, when a cpu returns to user space the +specialized stacks contain no useful data. The main cpu stacks is + +* Interrupt stack. IRQSTACKSIZE + + Used for external hardware interrupts. If this is the first external + hardware interrupt (i.e. not a nested hardware interrupt) then the + kernel switches from the current task to the interrupt stack. Like + the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS), + this gives more room for kernel interrupt processing without having + to increase the size of every per thread stack. + + The interrupt stack is also used when processing a softirq. + +Switching to the kernel interrupt stack is done by software based on a +per CPU interrupt nest counter. This is needed because x86-64 "IST" +hardware stacks cannot nest without races. + +x86_64 also has a feature which is not available on i386, the ability +to automatically switch to a new stack for designated events such as +double fault or NMI, which makes it easier to handle these unusual +events on x86_64. This feature is called the Interrupt Stack Table +(IST). There can be up to 7 IST entries per cpu. The IST code is an +index into the Task State Segment (TSS), the IST entries in the TSS +point to dedicated stacks, each stack can be a different size. + +An IST is selected by an non-zero value in the IST field of an +interrupt-gate descriptor. When an interrupt occurs and the hardware +loads such a descriptor, the hardware automatically sets the new stack +pointer based on the IST value, then invokes the interrupt handler. If +software wants to allow nested IST interrupts then the handler must +adjust the IST values on entry to and exit from the interrupt handler. +(this is occasionally done, e.g. for debug exceptions) + +Events with different IST codes (i.e. with different stacks) can be +nested. For example, a debug interrupt can safely be interrupted by an +NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack +pointers on entry to and exit from all IST events, in theory allowing +IST events with the same code to be nested. However in most cases, the +stack size allocated to an IST assumes no nesting for the same code. +If that assumption is ever broken then the stacks will become corrupt. + +The currently assigned IST stacks are :- + +* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for interrupt 12 - Stack Fault Exception (#SS). + + This allows to recover from invalid stack segments. Rarely + happens. + +* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for interrupt 8 - Double Fault Exception (#DF). + + Invoked when handling a exception causes another exception. Happens + when the kernel is very confused (e.g. kernel stack pointer corrupt) + Using a separate stack allows to recover from it well enough in many + cases to still output an oops. + +* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for non-maskable interrupts (NMI). + + NMI can be delivered at any time, including when the kernel is in the + middle of switching stacks. Using IST for NMI events avoids making + assumptions about the previous state of the kernel stack. + +* DEBUG_STACK. DEBUG_STKSZ + + Used for hardware debug interrupts (interrupt 1) and for software + debug interrupts (INT3). + + When debugging a kernel, debug interrupts (both hardware and + software) can occur at any time. Using IST for these interrupts + avoids making assumptions about the previous state of the kernel + stack. + +* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE). + + Used for interrupt 18 - Machine Check Exception (#MC). + + MCE can be delivered at any time, including when the kernel is in the + middle of switching stacks. Using IST for MCE events avoids making + assumptions about the previous state of the kernel stack. + +For more details see the Intel IA32 or AMD AMD64 architecture manuals. -- cgit v1.2.3 From 027a51cef330433ba5924fc92fb25ef48bcfc1a3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:38 +0200 Subject: [PATCH] Document my tree in Documentation/HOWTO Signed-off-by: Andi Kleen --- Documentation/HOWTO | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 915ae8c986c6..1d6560413cc5 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -358,7 +358,8 @@ Here is a list of some of the different kernel trees available: quilt trees: - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ - + - x86-64, partly i386, Andi Kleen + ftp.firstfloor.org:/pub/ak/x86_64/quilt/ Bug Reporting ------------- -- cgit v1.2.3 From 0637a70a5db98182d9ad3d6ae1ee30acf20afde9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] x86: Allow disabling early pci scans with pci=noearly or disallowing conf1 Some buggy systems can machine check when config space accesses happen for some non existent devices. i386/x86-64 do some early device scans that might trigger this. Allow pci=noearly to disable this. Also when type 1 is disabling also don't do any early accesses which are always type1. This moves the pci= configuration parsing to be a early parameter. I don't think this can break anything because it only changes a single global that is only used by PCI. Cc: gregkh@suse.de Cc: Trammell Hudson Signed-off-by: Andi Kleen --- Documentation/kernel-parameters.txt | 6 +++++- arch/i386/kernel/acpi/earlyquirk.c | 6 +++++- arch/i386/pci/common.c | 4 ++++ arch/i386/pci/early.c | 8 ++++++++ arch/i386/pci/pci.h | 1 + arch/x86_64/kernel/aperture.c | 2 +- arch/x86_64/kernel/early-quirks.c | 4 ++++ arch/x86_64/kernel/pci-calgary.c | 3 +++ arch/x86_64/kernel/vsmp.c | 3 +++ arch/x86_64/mm/k8topology.c | 3 +++ drivers/pci/pci.c | 5 ++--- include/asm-x86_64/pci-direct.h | 2 ++ 12 files changed, 41 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 71d05f481727..4ae99f6e9938 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1240,7 +1240,11 @@ running once the system is up. bootloader. This is currently used on IXP2000 systems where the bus has to be configured a certain way for adjunct CPUs. - + noearly [X86] Don't do any early type 1 scanning. + This might help on some broken boards which + machine check when some devices' config space + is read. But various workarounds are disabled + and some IOMMU drivers will not work. pcmv= [HW,PCMCIA] BadgePAD 4 pd. [PARIDE] diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 1649a175a206..fe799b11ac0a 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -48,7 +48,11 @@ void __init check_acpi_pci(void) int num, slot, func; /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. */ + always read ffffffff and should not have any side effect. + Actually a few buggy systems can machine check. Allow the user + to disable it by command line option at least -AK */ + if (!early_pci_allowed()) + return; /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index 0a362e3aeac5..68bce194e688 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -242,6 +242,10 @@ char * __devinit pcibios_setup(char *str) acpi_noirq_set(); return NULL; } + else if (!strcmp(str, "noearly")) { + pci_probe |= PCI_PROBE_NOEARLY; + return NULL; + } #ifndef CONFIG_X86_VISWS else if (!strcmp(str, "usepirqmask")) { pci_probe |= PCI_USE_PIRQ_MASK; diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c index b1f7f40d809b..713d6c866cae 100644 --- a/arch/i386/pci/early.c +++ b/arch/i386/pci/early.c @@ -1,6 +1,8 @@ #include +#include #include #include +#include "pci.h" /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ @@ -42,3 +44,9 @@ void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } + +int early_pci_allowed(void) +{ + return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == + PCI_PROBE_CONF1; +} diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index 8a7cf1f23684..1814f74569c6 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -17,6 +17,7 @@ #define PCI_PROBE_CONF2 0x0004 #define PCI_PROBE_MMCONF 0x0008 #define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 #define PCI_NO_SORT 0x0100 #define PCI_BIOS_SORT 0x0200 diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index f851e1f9c82a..b487396c4c5b 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -212,7 +212,7 @@ void __init iommu_hole_init(void) u64 aper_base, last_aper_base = 0; int valid_agp = 0; - if (iommu_aperture_disabled || !fix_aperture) + if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) return; printk("Checking aperture...\n"); diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index d637cff1c4b1..208e38a372c1 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -82,6 +82,10 @@ static struct chipset early_qrk[] = { void __init early_quirks(void) { int num, slot, func; + + if (!early_pci_allowed()) + return; + /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { for (slot = 0; slot < 32; slot++) { diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 466588f95601..cfb09b07ae99 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -924,6 +924,9 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!early_pci_allowed()) + return; + specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { diff --git a/arch/x86_64/kernel/vsmp.c b/arch/x86_64/kernel/vsmp.c index 92f70c74965f..044e852bd25e 100644 --- a/arch/x86_64/kernel/vsmp.c +++ b/arch/x86_64/kernel/vsmp.c @@ -20,6 +20,9 @@ static int __init vsmp_init(void) void *address; unsigned int cap, ctl; + if (!early_pci_allowed()) + return 0; + /* Check if we are running on a ScaleMP vSMP box */ if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index 7c45c2d2b8b2..5cf594f9230d 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -54,6 +54,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) nodes_clear(nodes_parsed); + if (!early_pci_allowed()) + return -1; + nb = find_northbridge(); if (nb < 0) return nb; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9f79dd6d51ab..684deb6b03aa 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -953,13 +953,12 @@ static int __devinit pci_setup(char *str) } str = k; } - return 1; + return 0; } +early_param("pci", pci_setup); device_initcall(pci_init); -__setup("pci=", pci_setup); - #if defined(CONFIG_ISA) || defined(CONFIG_EISA) /* FIXME: Some boxes have multiple ISA bridges! */ struct pci_dev *isa_bridge; diff --git a/include/asm-x86_64/pci-direct.h b/include/asm-x86_64/pci-direct.h index 9d916cdaa18e..eba9cb471df3 100644 --- a/include/asm-x86_64/pci-direct.h +++ b/include/asm-x86_64/pci-direct.h @@ -11,4 +11,6 @@ extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); +extern int early_pci_allowed(void); + #endif -- cgit v1.2.3