From be274eeaf20b4c7155242645d5e2c48b023e609b Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 7 Dec 2006 02:14:04 +0100 Subject: [PATCH] i386: extend bzImage protocol for relocatable protected mode kernel Extend bzImage protocol to enable bootloaders to load a completely relocatable bzImage. Now protected mode component of kernel is also relocatable and a boot-loader can load the protected mode component at a differnt physical address than 1MB. (If kernel was built with CONFIG_RELOCATABLE) Kexec can make use of it to load this kernel at a different physical address to capture kernel crash dumps. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- Documentation/i386/boot.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index c51314b1a463..cb28254f1550 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -35,6 +35,8 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible initrd address available to the bootloader. Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. +Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. + Introduce relocatable_kernel and kernel_alignment fields. **** MEMORY LAYOUT @@ -129,6 +131,8 @@ Offset Proto Name Meaning 0226/2 N/A pad1 Unused 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 022C/4 2.03+ initrd_addr_max Highest legal initrd address +0230/4 2.04+ kernel_alignment Physical addr alignment required for kernel +0234/1 2.04+ relocatable_kernel Whether kernel is relocatable or not (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. -- cgit v1.2.3 From b026872601976f666bae77b609dc490d1834bf77 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] x86-64: Try multiple timer variants in check_timer Instead of adding all kinds of more quirks try various timer routing variants in check_timer. In particular this tries to handle quirks from: - Nvidia NF2-4 reference BIOS: wrong timer override - Asus: Wrong timer override but no HPET table - ATI: require timer disabled in 8259 - Some boards: require timer enabled in 8259 We just try many of the the known variants in the hopefully right order in check_timer. Trying pin 0/2 on Nvidia suggested by Tim Hockin. TBD Experimental. Needs a lot of testing Signed-off-by: Andi Kleen --- Documentation/x86_64/boot-options.txt | 4 -- arch/x86_64/kernel/early-quirks.c | 5 -- arch/x86_64/kernel/io_apic.c | 124 ++++++++++++++-------------------- 3 files changed, 51 insertions(+), 82 deletions(-) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index f3c57f43ba64..ab9b1c046c00 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -52,10 +52,6 @@ APICs apicmaintimer. Useful when your PIT timer is totally broken. - disable_8254_timer / enable_8254_timer - Enable interrupt 0 timer routing over the 8254 in addition to over - the IO-APIC. The kernel tries to set a sensible default. - Early Console syntax: earlyprintk=vga diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index 68273bff58cc..fb0c6da41b7e 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -69,11 +69,6 @@ static void nvidia_bugs(void) static void ati_bugs(void) { - if (timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); - } } struct chipset { diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index f71461b1f03d..88fcc4ebbf6e 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; -static int disable_timer_pin_1 __initdata; - -int timer_over_8254 __initdata = 1; - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -348,29 +344,6 @@ static int __init disable_ioapic_setup(char *str) } early_param("noapic", disable_ioapic_setup); -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - - /* * Find the IRQ entry number of a certain pin. */ @@ -1579,10 +1552,33 @@ static inline void unlock_ExtINT_logic(void) * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for modern platforms only. */ -static inline void check_timer(void) + +static int try_apic_pin(int apic, int pin, char *msg) +{ + apic_printk(APIC_VERBOSE, KERN_INFO + "..TIMER: trying IO-APIC=%d PIN=%d %s", + apic, pin, msg); + + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (!no_timer_check && timer_irq_works()) { + nmi_watchdog_default(); + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + } + return 1; + } + clear_IO_APIC_pin(apic, pin); + apic_printk(APIC_QUIET, KERN_ERR " .. failed\n"); + return 0; +} + +/* The function from hell */ +static void check_timer(void) { int apic1, pin1, apic2, pin2; int vector; @@ -1603,61 +1599,43 @@ static inline void check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - if (timer_over_8254 > 0) - enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; - apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + /* Do this first, otherwise we get double interrupts on ATI boards */ + if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled")) + return; - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - return; - } - clear_IO_APIC_pin(apic1, pin1); - apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " - "connected to IO-APIC\n"); - } + /* Now try again with IRQ0 8259A enabled. + Assumes timer is on IO-APIC 0 ?!? */ + enable_8259A_irq(0); + unmask_IO_APIC_irq(0); + if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled")) + return; + disable_8259A_irq(0); - apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " - "through the 8259A ... "); + /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides + on Nvidia boards */ + if (!(apic1 == 0 && pin1 == 0) && + try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled")) + return; + if (!(apic1 == 0 && pin1 == 2) && + try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled")) + return; + + /* Then try pure 8259A routing on the 8259 as reported by BIOS*/ + enable_8259A_irq(0); if (pin2 != -1) { - apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", - apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); - if (timer_irq_works()) { - apic_printk(APIC_VERBOSE," works.\n"); - nmi_watchdog_default(); - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - } + if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS")) return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(apic2, pin2); } - apic_printk(APIC_VERBOSE," failed.\n"); + + /* Tried all possibilities to go through the IO-APIC. Now come the + really cheesy fallbacks. */ if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); -- cgit v1.2.3 From bff6547bb6a4e82c399d74e7fba78b12d2f162ed Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] Calgary: allow compiling Calgary in but not using it by default This patch makes it possible to compile Calgary in but not use it by default. In this mode, use 'iommu=calgary' to activate it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- Documentation/x86_64/boot-options.txt | 3 ++- arch/x86_64/Kconfig | 11 +++++++++++ arch/x86_64/kernel/pci-calgary.c | 9 +++++++++ arch/x86_64/kernel/pci-dma.c | 5 +++++ include/asm-x86_64/calgary.h | 2 ++ 5 files changed, 29 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index ab9b1c046c00..dbdcaf68e3ea 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -179,7 +179,7 @@ PCI IOMMU iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] - [,forcesac][,fullflush][,nomerge][,noaperture] + [,forcesac][,fullflush][,nomerge][,noaperture][,calgary] size set size of iommu (in bytes) noagp don't initialize the AGP driver and use full aperture. off don't use the IOMMU @@ -200,6 +200,7 @@ IOMMU buffering. nodac Forbid DMA >4GB panic Always panic when IOMMU overflows + calgary Use the Calgary IOMMU if it is available swiotlb=pages[,force] diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 010d2265f1cf..5cb509dbffe4 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -455,6 +455,17 @@ config CALGARY_IOMMU Normally the kernel will make the right choice by itself. If unsure, say Y. +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 8a1e4f35bc3c..0ddf29dae7e0 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -43,6 +43,12 @@ #include #include +#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT +int use_calgary __read_mostly = 1; +#else +int use_calgary __read_mostly = 0; +#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ + #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16) @@ -1061,6 +1067,9 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!use_calgary) + return; + if (!early_pci_allowed()) return; diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index f8d857453f8a..683b7a5c1ab3 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -296,6 +296,11 @@ __init int iommu_setup(char *p) gart_parse_options(p); #endif +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + p += strcspn(p, ","); if (*p == ',') ++p; diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h index 6b93f5a3a5c8..7ee900645719 100644 --- a/include/asm-x86_64/calgary.h +++ b/include/asm-x86_64/calgary.h @@ -51,6 +51,8 @@ struct iommu_table { #define TCE_TABLE_SIZE_4M 6 #define TCE_TABLE_SIZE_8M 7 +extern int use_calgary; + #ifdef CONFIG_CALGARY_IOMMU extern int calgary_iommu_init(void); extern void detect_calgary(void); -- cgit v1.2.3 From 8542b200cbe5609edd7aae0c304c091a1c290452 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] paravirt: Add option to allow skipping the timer check Add a way to disable the timer IRQ routing check via a boot option. The VMI timer code uses this to avoid triggering the pester Mingo code, which probes for some very unusual and broken motherboard routings. It fires 100% of the time when using a paravirtual delay mechanism instead of using a realtime delay, since there is no elapsed real time, and the 4 timer IRQs have not yet been delivered. In addition, it is entirely possible, though improbable, that this bug could surface on real hardware which picks a particularly bad time to enter SMM mode, causing a long latency during one of the timer IRQs. While here, make check_timer be __init. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen [chrisw: use no_timer_check to bring inline with x86_64 as per Andi's request] Signed-off-by: Chris Wright Cc: Andi Kleen Signed-off-by: Andrew Morton --- Documentation/kernel-parameters.txt | 7 +++++-- arch/i386/kernel/io_apic.c | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2e1898e4e8fd..4e90aa427aea 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -599,8 +599,6 @@ and is between 256 and 4096 characters. It is defined in the file hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages. - noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing - i8042.direct [HW] Put keyboard port into non-translated mode i8042.dumbkbd [HW] Pretend that controller can only read data from keyboard and cannot control its state @@ -1052,9 +1050,14 @@ and is between 256 and 4096 characters. It is defined in the file in certain environments such as networked servers or real-time systems. + noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing + noirqdebug [IA-32] Disables the code which attempts to detect and disable unhandled interrupt sources. + no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for + broken timer IRQ sources. + noisapnp [ISAPNP] Disables ISA PnP code. noinitrd [RAM] Tells the kernel not to load any configured diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index e33b7a845299..993150f206ed 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1932,6 +1932,15 @@ static void __init setup_ioapic_ids_from_mpc(void) static void __init setup_ioapic_ids_from_mpc(void) { } #endif +static int no_timer_check __initdata; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1940,10 +1949,13 @@ static void __init setup_ioapic_ids_from_mpc(void) { } * - if this function detects that timer IRQs are defunct, then we fall * back to ISA timer IRQs */ -static int __init timer_irq_works(void) +int __init timer_irq_works(void) { unsigned long t1 = jiffies; + if (no_timer_check) + return 1; + local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -2214,7 +2226,7 @@ int timer_uses_ioapic_pin_0; * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. */ -static inline void check_timer(void) +static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; int vector; -- cgit v1.2.3 From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: add sysctl for kstack_depth_to_print Add sysctl for kstack_depth_to_print. This lets users change the amount of raw stack data printed in dump_stack() without having to reboot. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- Documentation/sysctl/kernel.txt | 8 ++++++++ arch/i386/kernel/traps.c | 2 +- arch/x86_64/kernel/traps.c | 2 +- include/asm-x86_64/stacktrace.h | 2 ++ kernel/sysctl.c | 9 +++++++++ 5 files changed, 21 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 0bc7f1e3c9e6..5922e84d9133 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: - hotplug - java-appletviewer [ binfmt_java, obsolete ] - java-interpreter [ binfmt_java, obsolete ] +- kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/kmod.txt - msgmax @@ -170,6 +171,13 @@ This flag controls the L2 cache of G3 processor boards. If ============================================================== +kstack_depth_to_print: (X86 only) + +Controls the number of words to print when dumping the raw +kernel stack. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7b2f9f022089..1d48a75fa338 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -91,7 +91,7 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); -static int kstack_depth_to_print = 24; +int kstack_depth_to_print = 24; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 264db33476ab..75ceccee178c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; +int kstack_depth_to_print = 12; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h index 5eb9799bef76..6f0b54594307 100644 --- a/include/asm-x86_64/stacktrace.h +++ b/include/asm-x86_64/stacktrace.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKTRACE_H #define _ASM_STACKTRACE_H 1 +extern int kstack_depth_to_print; + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792b..6fc5e17086f4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, #ifdef CONFIG_X86 #include +#include #endif #if defined(CONFIG_SYSCTL) @@ -707,6 +708,14 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { -- cgit v1.2.3 From a1a70c25bed75ed36ed48bbe18b9029428d2452d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] i386: always enable regparm -mregparm=3 has been enabled by default for some time on i386, and AFAIK there aren't any problems with it left. This patch removes the REGPARM config option and sets -mregparm=3 unconditionally. Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- Documentation/stable_api_nonsense.txt | 3 --- arch/i386/Kconfig | 14 -------------- arch/i386/Makefile | 4 +--- include/asm-i386/module.h | 8 +------- 4 files changed, 2 insertions(+), 27 deletions(-) (limited to 'Documentation') diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt index f39c9d714db3..a2afca3b2bab 100644 --- a/Documentation/stable_api_nonsense.txt +++ b/Documentation/stable_api_nonsense.txt @@ -62,9 +62,6 @@ consider the following facts about the Linux kernel: - different structures can contain different fields - Some functions may not be implemented at all, (i.e. some locks compile away to nothing for non-SMP builds.) - - Parameter passing of variables from function to function can be - done in different ways (the CONFIG_REGPARM option controls - this.) - Memory within the kernel can be aligned in different ways, depending on the build options. - Linux runs on a wide range of different processor architectures. diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index bb1fa061c6cf..b6b2df40ca78 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -721,20 +721,6 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y -config REGPARM - bool "Use register arguments" - default y - help - Compile the kernel with -mregparm=3. This instructs gcc to use - a more efficient function call ABI which passes the first three - arguments of a function call via registers, which results in denser - and faster code. - - If this option is disabled, then the default ABI of passing - arguments via the stack is used. - - If unsure, say Y. - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d1aca52bf690..f7ac1aea1d8a 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -31,7 +31,7 @@ LDFLAGS_vmlinux := --emit-relocs endif CHECKFLAGS += -D__i386__ -CFLAGS += -pipe -msoft-float +CFLAGS += -pipe -msoft-float -mregparm=3 # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) @@ -39,8 +39,6 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu -cflags-$(CONFIG_REGPARM) += -mregparm=3 - # temporary until string.h is fixed cflags-y += -ffreestanding diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index fe5ae42e0273..02f8f541cbe0 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -62,18 +62,12 @@ struct mod_arch_specific #error unknown processor family #endif -#ifdef CONFIG_REGPARM -#define MODULE_REGPARM "REGPARM " -#else -#define MODULE_REGPARM "" -#endif - #ifdef CONFIG_4KSTACKS #define MODULE_STACKSIZE "4KSTACKS " #else #define MODULE_STACKSIZE "" #endif -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE #endif /* _ASM_I386_MODULE_H */ -- cgit v1.2.3 From 6d0185ea611276fdf81991d7774d396bdc1ae392 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] unwinder: Add debugging output to the Dwarf2 unwinder Add debugging printks to the unwinder to allow easier debugging when something goes wrong with it. This can be controlled with the new unwinder_debug=N option Most output is given by N=1 AK: Added documentation of unwinder_debug= Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- Documentation/kernel-parameters.txt | 3 + kernel/unwind.c | 113 ++++++++++++++++++++++++++++++------ 2 files changed, 99 insertions(+), 17 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4e90aa427aea..d34fd6a28faa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1735,6 +1735,9 @@ and is between 256 and 4096 characters. It is defined in the file norandmaps Don't use address space randomization Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + unwind_debug=N N > 0 will enable dwarf2 unwinder debugging + This is useful to get more information why + you got a "dwarf2 unwinder stuck" ______________________________________________________________________ diff --git a/kernel/unwind.c b/kernel/unwind.c index 7e721f104105..209e248517db 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -137,6 +137,17 @@ struct unwind_state { static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; +static unsigned unwind_debug; +static int __init unwind_debug_setup(char *s) +{ + unwind_debug = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("unwind_debug=", unwind_debug_setup); +#define dprintk(lvl, fmt, args...) \ + ((void)(lvl > unwind_debug \ + || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) + static struct unwind_table *find_table(unsigned long pc) { struct unwind_table *table; @@ -281,6 +292,7 @@ static void __init setup_unwind_table(struct unwind_table *table, hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + 2 * n * sizeof(unsigned long); + dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); header = alloc(hdrSize); if (!header) return; @@ -500,13 +512,17 @@ static unsigned long read_pointer(const u8 **pLoc, const unsigned long *pul; } ptr; - if (ptrType < 0 || ptrType == DW_EH_PE_omit) + if (ptrType < 0 || ptrType == DW_EH_PE_omit) { + dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); return 0; + } ptr.p8 = *pLoc; switch(ptrType & DW_EH_PE_FORM) { case DW_EH_PE_data2: - if (end < (const void *)(ptr.p16u + 1)) + if (end < (const void *)(ptr.p16u + 1)) { + dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); return 0; + } if(ptrType & DW_EH_PE_signed) value = get_unaligned(ptr.p16s++); else @@ -514,8 +530,10 @@ static unsigned long read_pointer(const u8 **pLoc, break; case DW_EH_PE_data4: #ifdef CONFIG_64BIT - if (end < (const void *)(ptr.p32u + 1)) + if (end < (const void *)(ptr.p32u + 1)) { + dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); return 0; + } if(ptrType & DW_EH_PE_signed) value = get_unaligned(ptr.p32s++); else @@ -527,8 +545,10 @@ static unsigned long read_pointer(const u8 **pLoc, BUILD_BUG_ON(sizeof(u32) != sizeof(value)); #endif case DW_EH_PE_native: - if (end < (const void *)(ptr.pul + 1)) + if (end < (const void *)(ptr.pul + 1)) { + dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); return 0; + } value = get_unaligned(ptr.pul++); break; case DW_EH_PE_leb128: @@ -536,10 +556,14 @@ static unsigned long read_pointer(const u8 **pLoc, value = ptrType & DW_EH_PE_signed ? get_sleb128(&ptr.p8, end) : get_uleb128(&ptr.p8, end); - if ((const void *)ptr.p8 > end) + if ((const void *)ptr.p8 > end) { + dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); return 0; + } break; default: + dprintk(2, "Cannot decode pointer type %02X (%p,%p).", + ptrType, ptr.p8, end); return 0; } switch(ptrType & DW_EH_PE_ADJUST) { @@ -549,11 +573,16 @@ static unsigned long read_pointer(const u8 **pLoc, value += (unsigned long)*pLoc; break; default: + dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", + ptrType, *pLoc, end); return 0; } if ((ptrType & DW_EH_PE_indirect) - && probe_kernel_address((unsigned long *)value, value)) + && probe_kernel_address((unsigned long *)value, value)) { + dprintk(1, "Cannot read indirect value %lx (%p,%p).", + value, *pLoc, end); return 0; + } *pLoc = ptr.p8; return value; @@ -702,8 +731,10 @@ static int processCFI(const u8 *start, state->label = NULL; return 1; } - if (state->stackDepth >= MAX_STACK_DEPTH) + if (state->stackDepth >= MAX_STACK_DEPTH) { + dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); return 0; + } state->stack[state->stackDepth++] = ptr.p8; break; case DW_CFA_restore_state: @@ -718,8 +749,10 @@ static int processCFI(const u8 *start, result = processCFI(start, end, 0, ptrType, state); state->loc = loc; state->label = label; - } else + } else { + dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); return 0; + } break; case DW_CFA_def_cfa: state->cfa.reg = get_uleb128(&ptr.p8, end); @@ -751,6 +784,7 @@ static int processCFI(const u8 *start, break; case DW_CFA_GNU_window_save: default: + dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); result = 0; break; } @@ -766,12 +800,17 @@ static int processCFI(const u8 *start, set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); break; } - if (ptr.p8 > end) + if (ptr.p8 > end) { + dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); result = 0; + } if (result && targetLoc != 0 && targetLoc < state->loc) return 1; } + if (result && ptr.p8 < end) + dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); + return result && ptr.p8 == end && (targetLoc == 0 @@ -843,6 +882,8 @@ int unwind(struct unwind_frame_info *frame) hdr[3]); } } + if(hdr && !fde) + dprintk(3, "Binary lookup for %lx failed.", pc); if (fde != NULL) { cie = cie_for_fde(fde, table); @@ -864,6 +905,8 @@ int unwind(struct unwind_frame_info *frame) fde = NULL; } else fde = NULL; + if(!fde) + dprintk(1, "Binary lookup result for %lx discarded.", pc); } if (fde == NULL) { for (fde = table->address, tableSize = table->size; @@ -895,6 +938,8 @@ int unwind(struct unwind_frame_info *frame) if (pc >= startLoc && pc < endLoc) break; } + if(!fde) + dprintk(3, "Linear lookup for %lx failed.", pc); } } if (cie != NULL) { @@ -928,6 +973,8 @@ int unwind(struct unwind_frame_info *frame) if (ptr >= end || *ptr) cie = NULL; } + if(!cie) + dprintk(1, "CIE unusable (%p,%p).", ptr, end); ++ptr; } if (cie != NULL) { @@ -938,9 +985,11 @@ int unwind(struct unwind_frame_info *frame) if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) cie = NULL; else if (UNW_PC(frame) % state.codeAlign - || UNW_SP(frame) % sleb128abs(state.dataAlign)) + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); return -EPERM; - else { + } else { retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); /* skip augmentation */ if (((const char *)(cie + 2))[1] == 'z') { @@ -954,6 +1003,8 @@ int unwind(struct unwind_frame_info *frame) || reg_info[retAddrReg].width != sizeof(unsigned long)) cie = NULL; } + if(!cie) + dprintk(1, "CIE validation failed (%p,%p).", ptr, end); } if (cie != NULL) { state.cieStart = ptr; @@ -967,6 +1018,8 @@ int unwind(struct unwind_frame_info *frame) if ((ptr += augSize) > end) fde = NULL; } + if(!fde) + dprintk(1, "FDE validation failed (%p,%p).", ptr, end); } if (cie == NULL || fde == NULL) { #ifdef CONFIG_FRAME_POINTER @@ -1025,8 +1078,10 @@ int unwind(struct unwind_frame_info *frame) || state.cfa.reg >= ARRAY_SIZE(reg_info) || reg_info[state.cfa.reg].width != sizeof(unsigned long) || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) - || state.cfa.offs % sizeof(unsigned long)) + || state.cfa.offs % sizeof(unsigned long)) { + dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); return -EIO; + } /* update frame */ #ifndef CONFIG_AS_CFI_SIGNAL_FRAME if(frame->call_frame @@ -1051,6 +1106,8 @@ int unwind(struct unwind_frame_info *frame) if (REG_INVALID(i)) { if (state.regs[i].where == Nowhere) continue; + dprintk(1, "Cannot restore register %u (%d).", + i, state.regs[i].where); return -EIO; } switch(state.regs[i].where) { @@ -1059,8 +1116,11 @@ int unwind(struct unwind_frame_info *frame) case Register: if (state.regs[i].value >= ARRAY_SIZE(reg_info) || REG_INVALID(state.regs[i].value) - || reg_info[i].width > reg_info[state.regs[i].value].width) + || reg_info[i].width > reg_info[state.regs[i].value].width) { + dprintk(1, "Cannot restore register %u from register %lu.", + i, state.regs[i].value); return -EIO; + } switch(reg_info[state.regs[i].value].width) { #define CASE(n) \ case sizeof(u##n): \ @@ -1070,6 +1130,9 @@ int unwind(struct unwind_frame_info *frame) CASES; #undef CASE default: + dprintk(1, "Unsupported register size %u (%lu).", + reg_info[state.regs[i].value].width, + state.regs[i].value); return -EIO; } break; @@ -1094,12 +1157,17 @@ int unwind(struct unwind_frame_info *frame) CASES; #undef CASE default: + dprintk(1, "Unsupported register size %u (%u).", + reg_info[i].width, i); return -EIO; } break; case Value: - if (reg_info[i].width != sizeof(unsigned long)) + if (reg_info[i].width != sizeof(unsigned long)) { + dprintk(1, "Unsupported value size %u (%u).", + reg_info[i].width, i); return -EIO; + } FRAME_REG(i, unsigned long) = cfa + state.regs[i].value * state.dataAlign; break; @@ -1111,8 +1179,11 @@ int unwind(struct unwind_frame_info *frame) % sizeof(unsigned long) || addr < startLoc || addr + sizeof(unsigned long) < addr - || addr + sizeof(unsigned long) > endLoc) + || addr + sizeof(unsigned long) > endLoc) { + dprintk(1, "Bad memory location %lx (%lx).", + addr, state.regs[i].value); return -EIO; + } switch(reg_info[i].width) { #define CASE(n) case sizeof(u##n): \ probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ @@ -1120,6 +1191,8 @@ int unwind(struct unwind_frame_info *frame) CASES; #undef CASE default: + dprintk(1, "Unsupported memory size %u (%u).", + reg_info[i].width, i); return -EIO; } } @@ -1128,9 +1201,15 @@ int unwind(struct unwind_frame_info *frame) } if (UNW_PC(frame) % state.codeAlign - || UNW_SP(frame) % sleb128abs(state.dataAlign) - || (pc == UNW_PC(frame) && sp == UNW_SP(frame))) + || UNW_SP(frame) % sleb128abs(state.dataAlign)) { + dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", + UNW_PC(frame), UNW_SP(frame)); return -EIO; + } + if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { + dprintk(1, "No progress (%lx,%lx).", pc, sp); + return -EIO; + } return 0; #undef CASES -- cgit v1.2.3 From d263b213577a1e8f166b0a7212d85175e36d6c19 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Correct documentation for bzImage protocol v2.05 Correct the documentation for bzImage protocol extension due to relocatable bzImage. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Cc: Andi Kleen Acked-by: "H. Peter Anvin" Signed-off-by: Andrew Morton --- Documentation/i386/boot.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index cb28254f1550..9575de300a61 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -2,7 +2,7 @@ ---------------------------- H. Peter Anvin - Last update 2005-09-02 + Last update 2006-11-17 On the i386 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as @@ -131,8 +131,8 @@ Offset Proto Name Meaning 0226/2 N/A pad1 Unused 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 022C/4 2.03+ initrd_addr_max Highest legal initrd address -0230/4 2.04+ kernel_alignment Physical addr alignment required for kernel -0234/1 2.04+ relocatable_kernel Whether kernel is relocatable or not +0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel +0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. -- cgit v1.2.3