diff options
Diffstat (limited to 'arch/i386/kernel')
29 files changed, 710 insertions, 479 deletions
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index fb3e72328a5a..9ea5b8ecc7e1 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -850,10 +850,9 @@ static inline int acpi_parse_madt_ioapic_entries(void) static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC - int count, error; + int error; - count = acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt); - if (count >= 1) { + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { /* * Parse MADT LAPIC entries @@ -1073,7 +1072,28 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { "ASUS A7V ACPI BIOS Revision 1007"), }, }, - + { + /* + * Latest BIOS for IBM 600E (1.16) has bad pcinum + * for LPC bridge, which is needed for the PCI + * interrupt links to work. DSDT fix is in bug 5966. + * 2645, 2646 model numbers are shared with 600/600E/600X + */ + .callback = disable_acpi_irq, + .ident = "IBM Thinkpad 600 Series 2645", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2645"), + }, + }, + { + .callback = disable_acpi_irq, + .ident = "IBM Thinkpad 600 Series 2646", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2646"), + }, + }, /* * Boxes that need ACPI PCI IRQ routing and PCI scan disabled */ diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index bf86f7662d8b..8f7efd38254d 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -14,11 +14,8 @@ #ifdef CONFIG_ACPI -static int nvidia_hpet_detected __initdata; - static int __init nvidia_hpet_check(struct acpi_table_header *header) { - nvidia_hpet_detected = 1; return 0; } #endif @@ -26,12 +23,13 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header) static int __init check_bridge(int vendor, int device) { #ifdef CONFIG_ACPI + static int warned; /* According to Nvidia all timer overrides are bogus unless HPET is enabled. */ if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { - nvidia_hpet_detected = 0; - acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check); - if (nvidia_hpet_detected == 0) { + if (!warned && acpi_table_parse(ACPI_SIG_HPET, + nvidia_hpet_check)) { + warned = 1; acpi_skip_timer_override = 1; printk(KERN_INFO "Nvidia board " "detected. Ignoring ACPI " diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 9eca21b49f6b..426f59b0106b 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -5,15 +5,9 @@ #include <asm/alternative.h> #include <asm/sections.h> -static int no_replacement = 0; static int smp_alt_once = 0; static int debug_alternative = 0; -static int __init noreplacement_setup(char *s) -{ - no_replacement = 1; - return 1; -} static int __init bootonly(char *str) { smp_alt_once = 1; @@ -25,7 +19,6 @@ static int __init debug_alt(char *str) return 1; } -__setup("noreplacement", noreplacement_setup); __setup("smp-alt-boot", bootonly); __setup("debug-alternative", debug_alt); @@ -252,9 +245,6 @@ void alternatives_smp_module_add(struct module *mod, char *name, struct smp_alt_module *smp; unsigned long flags; - if (no_replacement) - return; - if (smp_alt_once) { if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(locks, locks_end, @@ -289,7 +279,7 @@ void alternatives_smp_module_del(struct module *mod) struct smp_alt_module *item; unsigned long flags; - if (no_replacement || smp_alt_once) + if (smp_alt_once) return; spin_lock_irqsave(&smp_alt, flags); @@ -320,7 +310,7 @@ void alternatives_smp_switch(int smp) return; #endif - if (no_replacement || smp_alt_once) + if (smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); @@ -386,13 +376,6 @@ extern struct paravirt_patch __start_parainstructions[], void __init alternative_instructions(void) { unsigned long flags; - if (no_replacement) { - printk(KERN_INFO "(SMP-)alternatives turned off\n"); - free_init_pages("SMP alternatives", - (unsigned long)__smp_alt_begin, - (unsigned long)__smp_alt_end); - return; - } local_irq_save(flags); apply_alternatives(__alt_instructions, __alt_instructions_end); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 9655c233e6f1..93aa911646ad 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -28,6 +28,7 @@ #include <linux/clockchips.h> #include <linux/acpi_pmtmr.h> #include <linux/module.h> +#include <linux/dmi.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -38,7 +39,6 @@ #include <asm/hpet.h> #include <asm/i8253.h> #include <asm/nmi.h> -#include <asm/idle.h> #include <mach_apic.h> #include <mach_apicdef.h> @@ -62,6 +62,11 @@ static int enable_local_apic __initdata = 0; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int local_apic_timer_disabled; +/* Local APIC timer works in C2 */ +int local_apic_timer_c2_ok; +EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); /* * Debug level, exported for io_apic.c @@ -339,6 +344,23 @@ void __init setup_boot_APIC_clock(void) void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; long delta, deltapm; + int pm_referenced = 0; + + if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN)) + local_apic_timer_disabled = 1; + + /* + * The local apic timer can be disabled via the kernel + * commandline or from the test above. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. + */ + if (local_apic_timer_disabled) { + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); + return; + } apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" "calibrating APIC timer ...\n"); @@ -358,7 +380,8 @@ void __init setup_boot_APIC_clock(void) /* Let the interrupts run */ local_irq_enable(); - while(lapic_cal_loops <= LAPIC_CAL_LOOPS); + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); local_irq_disable(); @@ -395,6 +418,7 @@ void __init setup_boot_APIC_clock(void) "%lu (%ld)\n", (unsigned long) res, delta); delta = (long) res; } + pm_referenced = 1; } /* Calculate the scaled math multiplication factor */ @@ -424,69 +448,43 @@ void __init setup_boot_APIC_clock(void) calibration_result / (1000000 / HZ), calibration_result % (1000000 / HZ)); - - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ local_apic_timer_verify_ok = 1; - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - /* Let the interrupts run */ - local_irq_enable(); + /* We trust the pm timer based calibration */ + if (!pm_referenced) { + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - while(lapic_cal_loops <= LAPIC_CAL_LOOPS); + /* + * Setup the apic timer manually + */ + levt->event_handler = lapic_cal_handler; + lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); + lapic_cal_loops = -1; - local_irq_disable(); + /* Let the interrupts run */ + local_irq_enable(); - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + while(lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); - local_irq_enable(); + local_irq_disable(); - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + /* Stop the lapic timer */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + local_irq_enable(); - local_apic_timer_verify_ok = 0; + /* Jiffies delta */ + deltaj = lapic_cal_j2 - lapic_cal_j1; + apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - if (deltapm) { - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - /* Check, if the jiffies result is consistent */ - if (deltaj < LAPIC_CAL_LOOPS-2 || - deltaj > LAPIC_CAL_LOOPS+2) { - /* - * Not sure, what we can do about this one. - * When high resultion timers are active - * and the lapic timer does not stop in C3 - * we are fine. Otherwise more trouble might - * be waiting. -- tglx - */ - printk(KERN_WARNING "Global event device %s " - "has wrong frequency " - "(%lu ticks instead of %d)\n", - global_clock_event->name, deltaj, - LAPIC_CAL_LOOPS); - } - local_apic_timer_verify_ok = 1; - } - } else { /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && - deltaj <= LAPIC_CAL_LOOPS+2) { + if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - local_apic_timer_verify_ok = 1; - } - } + else + local_apic_timer_verify_ok = 0; + } else + local_irq_enable(); if (!local_apic_timer_verify_ok) { printk(KERN_WARNING @@ -494,8 +492,15 @@ void __init setup_boot_APIC_clock(void) /* No broadcast on UP ! */ if (num_possible_cpus() == 1) return; - } else - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + } else { + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + } /* Setup the lapic or request the broadcast */ setup_APIC_timer(); @@ -561,7 +566,6 @@ void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ - exit_idle(); irq_enter(); local_apic_timer_interrupt(); irq_exit(); @@ -1198,6 +1202,20 @@ static int __init parse_nolapic(char *arg) } early_param("nolapic", parse_nolapic); +static int __init parse_disable_lapic_timer(char *arg) +{ + local_apic_timer_disabled = 1; + return 0; +} +early_param("nolapic_timer", parse_disable_lapic_timer); + +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + static int __init apic_set_verbosity(char *str) { if (strcmp("debug", str) == 0) @@ -1221,7 +1239,6 @@ void smp_spurious_interrupt(struct pt_regs *regs) { unsigned long v; - exit_idle(); irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -1245,7 +1262,6 @@ void smp_error_interrupt(struct pt_regs *regs) { unsigned long v, v1; - exit_idle(); irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 41cfea57232b..2d47db482972 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -22,6 +22,37 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; @@ -241,6 +272,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (cpuid_eax(0x80000000) >= 0x80000006) num_cache_leaves = 3; + + if (amd_apic_timer_broken()) + set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 6c52182ca323..e912aae9473c 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -125,7 +125,6 @@ config X86_SPEEDSTEP_CENTRINO_ACPI bool "Use ACPI tables to decode valid frequency/voltage (deprecated)" depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m) - default y help This is deprecated and this functionality is now merged into acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index b59878a0d9b3..2b030d6ccbf7 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -61,8 +61,8 @@ static int cpu_model; static unsigned int numscales=16; static unsigned int fsb; -static struct mV_pos *vrm_mV_table; -static unsigned char *mV_vrm_table; +static const struct mV_pos *vrm_mV_table; +static const unsigned char *mV_vrm_table; struct f_msr { u8 vrm; u8 pos; @@ -758,7 +758,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) NULL, (void *)&pr); /* Check ACPI support for C3 state */ - if (pr != NULL && longhaul_version != TYPE_LONGHAUL_V1) { + if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { cx = &pr->power.states[ACPI_STATE_C3]; if (cx->address > 0 && cx->latency <= 1000) { longhaul_flags |= USE_ACPI_C3; diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index bb0a04b1d1ab..102548f12842 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -56,7 +56,7 @@ union msr_longhaul { /* * VIA C3 Samuel 1 & Samuel 2 (stepping 0) */ -static int __initdata samuel1_clock_ratio[16] = { +static const int __initdata samuel1_clock_ratio[16] = { -1, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -75,7 +75,7 @@ static int __initdata samuel1_clock_ratio[16] = { -1, /* 1111 -> RESERVED */ }; -static int __initdata samuel1_eblcr[16] = { +static const int __initdata samuel1_eblcr[16] = { 50, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -97,7 +97,7 @@ static int __initdata samuel1_eblcr[16] = { /* * VIA C3 Samuel2 Stepping 1->15 */ -static int __initdata samuel2_eblcr[16] = { +static const int __initdata samuel2_eblcr[16] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -119,7 +119,7 @@ static int __initdata samuel2_eblcr[16] = { /* * VIA C3 Ezra */ -static int __initdata ezra_clock_ratio[16] = { +static const int __initdata ezra_clock_ratio[16] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -138,7 +138,7 @@ static int __initdata ezra_clock_ratio[16] = { 120, /* 1111 -> 12.0x */ }; -static int __initdata ezra_eblcr[16] = { +static const int __initdata ezra_eblcr[16] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -160,7 +160,7 @@ static int __initdata ezra_eblcr[16] = { /* * VIA C3 (Ezra-T) [C5M]. */ -static int __initdata ezrat_clock_ratio[32] = { +static const int __initdata ezrat_clock_ratio[32] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -196,7 +196,7 @@ static int __initdata ezrat_clock_ratio[32] = { -1, /* 1111 -> RESERVED (12.0x) */ }; -static int __initdata ezrat_eblcr[32] = { +static const int __initdata ezrat_eblcr[32] = { 50, /* 0000 -> 5.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -235,7 +235,7 @@ static int __initdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ -static int __initdata nehemiah_clock_ratio[32] = { +static const int __initdata nehemiah_clock_ratio[32] = { 100, /* 0000 -> 10.0x */ 160, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ @@ -270,7 +270,7 @@ static int __initdata nehemiah_clock_ratio[32] = { 120, /* 1111 -> 12.0x */ }; -static int __initdata nehemiah_eblcr[32] = { +static const int __initdata nehemiah_eblcr[32] = { 50, /* 0000 -> 5.0x */ 160, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ @@ -315,7 +315,7 @@ struct mV_pos { unsigned short pos; }; -static struct mV_pos __initdata vrm85_mV[32] = { +static const struct mV_pos __initdata vrm85_mV[32] = { {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, @@ -326,14 +326,14 @@ static struct mV_pos __initdata vrm85_mV[32] = { {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} }; -static unsigned char __initdata mV_vrm85[32] = { +static const unsigned char __initdata mV_vrm85[32] = { 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 }; -static struct mV_pos __initdata mobilevrm_mV[32] = { +static const struct mV_pos __initdata mobilevrm_mV[32] = { {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, @@ -344,7 +344,7 @@ static struct mV_pos __initdata mobilevrm_mV[32] = { {675, 3}, {650, 2}, {625, 1}, {600, 0} }; -static unsigned char __initdata mV_mobilevrm[32] = { +static const unsigned char __initdata mV_mobilevrm[32] = { 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 54382760983a..837b04166a47 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -68,7 +68,7 @@ union powernow_acpi_control_t { #ifdef CONFIG_CPU_FREQ_DEBUG /* divide by 1000 to get VCore voltage in V. */ -static int mobile_vid_table[32] = { +static const int mobile_vid_table[32] = { 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, @@ -77,7 +77,7 @@ static int mobile_vid_table[32] = { #endif /* divide by 10 to get FID. */ -static int fid_codes[32] = { +static const int fid_codes[32] = { 110, 115, 120, 125, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 30, 190, 40, 200, 130, 135, 140, 210, diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 8359c19d3a23..504434a46011 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -12,7 +12,6 @@ #include <asm/system.h> #include <asm/msr.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/therm_throt.h> @@ -60,7 +59,6 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm fastcall void smp_thermal_interrupt(struct pt_regs *regs) { - exit_idle(); irq_enter(); vendor_thermal_interrupt(regs); irq_exit(); diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index e1006b7acc9e..17d73459fc5f 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -3,6 +3,8 @@ #include <linux/errno.h> #include <linux/hpet.h> #include <linux/init.h> +#include <linux/sysdev.h> +#include <linux/pm.h> #include <asm/hpet.h> #include <asm/io.h> @@ -197,16 +199,34 @@ static int hpet_next_event(unsigned long delta, cnt += delta; hpet_writel(cnt, HPET_T0_CMP); - return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0); + return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; } /* + * Clock source related code + */ +static cycle_t read_hpet(void) +{ + return (cycle_t)hpet_readl(HPET_COUNTER); +} + +static struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = HPET_MASK, + .shift = HPET_SHIFT, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* * Try to setup the HPET timer */ int __init hpet_enable(void) { unsigned long id; uint64_t hpet_freq; + u64 tmp; if (!is_hpet_capable()) return 0; @@ -253,6 +273,25 @@ int __init hpet_enable(void) /* Start the counter */ hpet_start_counter(); + /* Initialize and register HPET clocksource + * + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + clocksource_register(&clocksource_hpet); + + if (id & HPET_ID_LEGSUP) { hpet_enable_int(); hpet_reserve_platform_timers(id); @@ -270,52 +309,10 @@ int __init hpet_enable(void) out_nohpet: iounmap(hpet_virt_address); hpet_virt_address = NULL; + boot_hpet_disable = 1; return 0; } -/* - * Clock source related code - */ -static cycle_t read_hpet(void) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = HPET_MASK, - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static int __init init_hpet_clocksource(void) -{ - u64 tmp; - - if (!hpet_virt_address) - return -ENODEV; - - /* - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - - return clocksource_register(&clocksource_hpet); -} - -module_init(init_hpet_clocksource); #ifdef CONFIG_HPET_EMULATE_RTC @@ -527,3 +524,68 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } #endif + + +/* + * Suspend/resume part + */ + +#ifdef CONFIG_PM + +static int hpet_suspend(struct sys_device *sys_device, pm_message_t state) +{ + unsigned long cfg = hpet_readl(HPET_CFG); + + cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY); + hpet_writel(cfg, HPET_CFG); + + return 0; +} + +static int hpet_resume(struct sys_device *sys_device) +{ + unsigned int id; + + hpet_start_counter(); + + id = hpet_readl(HPET_ID); + + if (id & HPET_ID_LEGSUP) + hpet_enable_int(); + + return 0; +} + +static struct sysdev_class hpet_class = { + set_kset_name("hpet"), + .suspend = hpet_suspend, + .resume = hpet_resume, +}; + +static struct sys_device hpet_device = { + .id = 0, + .cls = &hpet_class, +}; + + +static __init int hpet_register_sysfs(void) +{ + int err; + + if (!is_hpet_capable()) + return 0; + + err = sysdev_class_register(&hpet_class); + + if (!err) { + err = sysdev_register(&hpet_device); + if (err) + sysdev_class_unregister(&hpet_class); + } + + return err; +} + +device_initcall(hpet_register_sysfs); + +#endif diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index e3d4b73bfdb0..4afe26e86260 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -28,3 +28,5 @@ EXPORT_SYMBOL(__read_lock_failed); #endif EXPORT_SYMBOL(csum_partial); + +EXPORT_SYMBOL(_proxy_pda); diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index a6bc7bb38834..10cef5ca8a5b 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c @@ -47,9 +47,17 @@ static void init_pit_timer(enum clock_event_mode mode, outb(LATCH >> 8 , PIT_CH0); /* MSB */ break; - case CLOCK_EVT_MODE_ONESHOT: + /* + * Avoid unnecessary state transitions, as it confuses + * Geode / Cyrix based boxen. + */ case CLOCK_EVT_MODE_SHUTDOWN: + if (evt->mode == CLOCK_EVT_MODE_UNUSED) + break; case CLOCK_EVT_MODE_UNUSED: + if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN) + break; + case CLOCK_EVT_MODE_ONESHOT: /* One shot setup */ outb_p(0x38, PIT_MODE); udelay(10); @@ -195,4 +203,4 @@ static int __init init_pit_clocksource(void) clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); return clocksource_register(&clocksource_pit); } -module_init(init_pit_clocksource); +arch_initcall(init_pit_clocksource); diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 4ccebd454e25..b3ab8ffebd27 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -343,7 +343,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } - set_native_irq_info(irq, cpumask); + irq_desc[irq].affinity = cpumask; spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -736,7 +736,7 @@ failed: return 0; } -int __init irqbalance_disable(char *str) +int __devinit irqbalance_disable(char *str) { irqbalance_disabled = 1; return 1; @@ -1354,7 +1354,6 @@ static void __init setup_IO_APIC_irqs(void) } spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, entry); - set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -2585,7 +2584,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - set_native_irq_info(irq, mask); + irq_desc[irq].affinity = mask; } #endif /* CONFIG_SMP */ @@ -2669,7 +2668,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(mask); target_ht_irq(irq, dest); - set_native_irq_info(irq, mask); + irq_desc[irq].affinity = mask; } #endif @@ -2875,7 +2874,6 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(ioapic, pin, entry); - set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 0f2ca590bf23..8db8d514c9c0 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -18,8 +18,6 @@ #include <linux/cpu.h> #include <linux/delay.h> -#include <asm/idle.h> - #include <asm/apic.h> #include <asm/uaccess.h> @@ -77,7 +75,6 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) union irq_ctx *curctx, *irqctx; u32 *isp; #endif - exit_idle(); if (unlikely((unsigned)irq >= NR_IRQS)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index b8f16633a6ec..cbe7ec8dbb9f 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -567,6 +567,53 @@ static int cpu_request_microcode(int cpu) return error; } +static int apply_microcode_on_cpu(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + cpumask_t old; + unsigned int val[2]; + int err = 0; + + if (!uci->mc) + return -EINVAL; + + old = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) + err = -EINVAL; + + if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + if (uci->pf != (1 << ((val[1] >> 18) & 7))) + err = -EINVAL; + } + + if (!err) { + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (uci->rev != val[1]) + err = -EINVAL; + } + + if (!err) + apply_microcode(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " sig=0x%x, pf=0x%x, rev=0x%x\n", + cpu, uci->sig, uci->pf, uci->rev); + + set_cpus_allowed(current, old); + return err; +} + static void microcode_init_cpu(int cpu) { cpumask_t old; @@ -577,7 +624,8 @@ static void microcode_init_cpu(int cpu) set_cpus_allowed(current, cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING) + if (uci->valid && system_state == SYSTEM_RUNNING && + !suspend_cpu_hotplug) cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); set_cpus_allowed(current, old); @@ -663,13 +711,24 @@ static int mc_sysdev_add(struct sys_device *sys_dev) return 0; pr_debug("Microcode:CPU %d added\n", cpu); - memset(uci, 0, sizeof(*uci)); + /* If suspend_cpu_hotplug is set, the system is resuming and we should + * use the data from before the suspend. + */ + if (suspend_cpu_hotplug) { + err = apply_microcode_on_cpu(cpu); + if (err) + microcode_fini_cpu(cpu); + } + if (!uci->valid) + memset(uci, 0, sizeof(*uci)); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) return err; - microcode_init_cpu(cpu); + if (!uci->valid) + microcode_init_cpu(cpu); + return 0; } @@ -680,7 +739,11 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; pr_debug("Microcode:CPU %d removed\n", cpu); - microcode_fini_cpu(cpu); + /* If suspend_cpu_hotplug is set, the system is suspending and we should + * keep the microcode in memory for the resume. + */ + if (!suspend_cpu_hotplug) + microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 821df34d2b3a..84c3497efb60 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -41,16 +41,17 @@ int nmi_watchdog_enabled; * different subsystems this reservation system just tries to coordinate * things a little */ -static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); -static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); - -static cpumask_t backtrace_mask = CPU_MASK_NONE; /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ #define NMI_MAX_COUNTER_BITS 66 +#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS) + +static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]); +static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled * <0: the lapic NMI watchdog has not been set up, and cannot @@ -122,64 +123,129 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) /* checks for a bit availability (hack for oprofile) */ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) { + int cpu; BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); + for_each_possible_cpu (cpu) { + if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) + return 0; + } + return 1; } /* checks the an msr for availability */ int avail_to_resrv_perfctr_nmi(unsigned int msr) { unsigned int counter; + int cpu; counter = nmi_perfctr_msr_to_bit(msr); BUG_ON(counter > NMI_MAX_COUNTER_BITS); - return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); + for_each_possible_cpu (cpu) { + if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) + return 0; + } + return 1; } -int reserve_perfctr_nmi(unsigned int msr) +static int __reserve_perfctr_nmi(int cpu, unsigned int msr) { unsigned int counter; + if (cpu < 0) + cpu = smp_processor_id(); counter = nmi_perfctr_msr_to_bit(msr); BUG_ON(counter > NMI_MAX_COUNTER_BITS); - if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) + if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) return 1; return 0; } -void release_perfctr_nmi(unsigned int msr) +static void __release_perfctr_nmi(int cpu, unsigned int msr) { unsigned int counter; + if (cpu < 0) + cpu = smp_processor_id(); counter = nmi_perfctr_msr_to_bit(msr); BUG_ON(counter > NMI_MAX_COUNTER_BITS); - clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); + clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]); } -int reserve_evntsel_nmi(unsigned int msr) +int reserve_perfctr_nmi(unsigned int msr) +{ + int cpu, i; + for_each_possible_cpu (cpu) { + if (!__reserve_perfctr_nmi(cpu, msr)) { + for_each_possible_cpu (i) { + if (i >= cpu) + break; + __release_perfctr_nmi(i, msr); + } + return 0; + } + } + return 1; +} + +void release_perfctr_nmi(unsigned int msr) +{ + int cpu; + for_each_possible_cpu (cpu) { + __release_perfctr_nmi(cpu, msr); + } +} + +int __reserve_evntsel_nmi(int cpu, unsigned int msr) { unsigned int counter; + if (cpu < 0) + cpu = smp_processor_id(); counter = nmi_evntsel_msr_to_bit(msr); BUG_ON(counter > NMI_MAX_COUNTER_BITS); - if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) + if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0])) return 1; return 0; } -void release_evntsel_nmi(unsigned int msr) +static void __release_evntsel_nmi(int cpu, unsigned int msr) { unsigned int counter; + if (cpu < 0) + cpu = smp_processor_id(); counter = nmi_evntsel_msr_to_bit(msr); BUG_ON(counter > NMI_MAX_COUNTER_BITS); - clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); + clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]); +} + +int reserve_evntsel_nmi(unsigned int msr) +{ + int cpu, i; + for_each_possible_cpu (cpu) { + if (!__reserve_evntsel_nmi(cpu, msr)) { + for_each_possible_cpu (i) { + if (i >= cpu) + break; + __release_evntsel_nmi(i, msr); + } + return 0; + } + } + return 1; +} + +void release_evntsel_nmi(unsigned int msr) +{ + int cpu; + for_each_possible_cpu (cpu) { + __release_evntsel_nmi(cpu, msr); + } } static __cpuinit inline int nmi_known_cpu(void) @@ -245,14 +311,6 @@ static int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; - /* Enable NMI watchdog for newer systems. - Probably safe on most older systems too, but let's be careful. - IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM - which hangs the system. Disable watchdog for all thinkpads */ - if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 && - !dmi_name_in_vendors("ThinkPad")) - nmi_watchdog = NMI_LOCAL_APIC; - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) return 0; @@ -271,7 +329,7 @@ static int __init check_nmi_watchdog(void) for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP @@ -515,10 +573,10 @@ static int setup_k7_watchdog(void) perfctr_msr = MSR_K7_PERFCTR0; evntsel_msr = MSR_K7_EVNTSEL0; - if (!reserve_perfctr_nmi(perfctr_msr)) + if (!__reserve_perfctr_nmi(-1, perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(evntsel_msr)) + if (!__reserve_evntsel_nmi(-1, evntsel_msr)) goto fail1; wrmsrl(perfctr_msr, 0UL); @@ -541,7 +599,7 @@ static int setup_k7_watchdog(void) wd->check_bit = 1ULL<<63; return 1; fail1: - release_perfctr_nmi(perfctr_msr); + __release_perfctr_nmi(-1, perfctr_msr); fail: return 0; } @@ -552,8 +610,8 @@ static void stop_k7_watchdog(void) wrmsr(wd->evntsel_msr, 0, 0); - release_evntsel_nmi(wd->evntsel_msr); - release_perfctr_nmi(wd->perfctr_msr); + __release_evntsel_nmi(-1, wd->evntsel_msr); + __release_perfctr_nmi(-1, wd->perfctr_msr); } #define P6_EVNTSEL0_ENABLE (1 << 22) @@ -571,10 +629,10 @@ static int setup_p6_watchdog(void) perfctr_msr = MSR_P6_PERFCTR0; evntsel_msr = MSR_P6_EVNTSEL0; - if (!reserve_perfctr_nmi(perfctr_msr)) + if (!__reserve_perfctr_nmi(-1, perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(evntsel_msr)) + if (!__reserve_evntsel_nmi(-1, evntsel_msr)) goto fail1; wrmsrl(perfctr_msr, 0UL); @@ -598,7 +656,7 @@ static int setup_p6_watchdog(void) wd->check_bit = 1ULL<<39; return 1; fail1: - release_perfctr_nmi(perfctr_msr); + __release_perfctr_nmi(-1, perfctr_msr); fail: return 0; } @@ -609,8 +667,8 @@ static void stop_p6_watchdog(void) wrmsr(wd->evntsel_msr, 0, 0); - release_evntsel_nmi(wd->evntsel_msr); - release_perfctr_nmi(wd->perfctr_msr); + __release_evntsel_nmi(-1, wd->evntsel_msr); + __release_perfctr_nmi(-1, wd->perfctr_msr); } /* Note that these events don't tick when the CPU idles. This means @@ -676,10 +734,10 @@ static int setup_p4_watchdog(void) cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); } - if (!reserve_perfctr_nmi(perfctr_msr)) + if (!__reserve_perfctr_nmi(-1, perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(evntsel_msr)) + if (!__reserve_evntsel_nmi(-1, evntsel_msr)) goto fail1; evntsel = P4_ESCR_EVENT_SELECT(0x3F) @@ -703,7 +761,7 @@ static int setup_p4_watchdog(void) wd->check_bit = 1ULL<<39; return 1; fail1: - release_perfctr_nmi(perfctr_msr); + __release_perfctr_nmi(-1, perfctr_msr); fail: return 0; } @@ -715,8 +773,8 @@ static void stop_p4_watchdog(void) wrmsr(wd->cccr_msr, 0, 0); wrmsr(wd->evntsel_msr, 0, 0); - release_evntsel_nmi(wd->evntsel_msr); - release_perfctr_nmi(wd->perfctr_msr); + __release_evntsel_nmi(-1, wd->evntsel_msr); + __release_perfctr_nmi(-1, wd->perfctr_msr); } #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL @@ -744,10 +802,10 @@ static int setup_intel_arch_watchdog(void) perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; - if (!reserve_perfctr_nmi(perfctr_msr)) + if (!__reserve_perfctr_nmi(-1, perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(evntsel_msr)) + if (!__reserve_evntsel_nmi(-1, evntsel_msr)) goto fail1; wrmsrl(perfctr_msr, 0UL); @@ -772,7 +830,7 @@ static int setup_intel_arch_watchdog(void) wd->check_bit = 1ULL << (eax.split.bit_width - 1); return 1; fail1: - release_perfctr_nmi(perfctr_msr); + __release_perfctr_nmi(-1, perfctr_msr); fail: return 0; } @@ -795,8 +853,8 @@ static void stop_intel_arch_watchdog(void) return; wrmsr(wd->evntsel_msr, 0, 0); - release_evntsel_nmi(wd->evntsel_msr); - release_perfctr_nmi(wd->perfctr_msr); + __release_evntsel_nmi(-1, wd->evntsel_msr); + __release_perfctr_nmi(-1, wd->perfctr_msr); } void setup_apic_nmi_watchdog (void *unused) diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index c156ecfa3872..2ec331e03fa9 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -32,6 +32,7 @@ #include <asm/fixmap.h> #include <asm/apic.h> #include <asm/tlbflush.h> +#include <asm/timer.h> /* nop stub */ static void native_nop(void) @@ -493,7 +494,7 @@ struct paravirt_ops paravirt_ops = { .memory_setup = machine_specific_memory_setup, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, - .time_init = time_init_hook, + .time_init = hpet_time_init, .init_IRQ = native_init_IRQ, .cpuid = native_cpuid, @@ -520,6 +521,8 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .get_scheduled_cycles = native_read_tsc, + .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, @@ -535,7 +538,6 @@ struct paravirt_ops paravirt_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .const_udelay = __const_udelay, #ifdef CONFIG_X86_LOCAL_APIC .apic_write = native_apic_write, @@ -550,6 +552,8 @@ struct paravirt_ops paravirt_ops = { .flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_single = native_flush_tlb_single, + .map_pt_hook = (void *)native_nop, + .alloc_pt = (void *)native_nop, .alloc_pd = (void *)native_nop, .alloc_pd_clone = (void *)native_nop, diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 41af692c1584..3ebcea033623 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -110,7 +110,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, return DMA_MEMORY_IO; free1_out: - kfree(dev->dma_mem->bitmap); + kfree(dev->dma_mem); out: if (mem_base) iounmap(mem_base); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index bea304d48cdb..393a67d5d943 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -49,7 +49,6 @@ #include <asm/i387.h> #include <asm/desc.h> #include <asm/vm86.h> -#include <asm/idle.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif @@ -82,42 +81,6 @@ void (*pm_idle)(void); EXPORT_SYMBOL(pm_idle); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} - -static DEFINE_PER_CPU(volatile unsigned long, idle_state); - -void enter_idle(void) -{ - /* needs to be atomic w.r.t. interrupts, not against other CPUs */ - __set_bit(0, &__get_cpu_var(idle_state)); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); -} - -static void __exit_idle(void) -{ - /* needs to be atomic w.r.t. interrupts, not against other CPUs */ - if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0) - return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); -} - -void exit_idle(void) -{ - if (current->pid) - return; - __exit_idle(); -} - void disable_hlt(void) { hlt_counter++; @@ -168,7 +131,6 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle (void) { - local_irq_enable(); cpu_relax(); } @@ -229,16 +191,7 @@ void cpu_idle(void) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; - - /* - * Idle routines should keep interrupts disabled - * from here on, until they go to idle. - * Otherwise, idle callbacks can misfire. - */ - local_irq_disable(); - enter_idle(); idle(); - __exit_idle(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); @@ -293,11 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) - __sti_mwait(eax, ecx); - else - local_irq_enable(); - } else { - local_irq_enable(); + __mwait(eax, ecx); } } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 122623dcc6e1..698c24fe482e 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -657,5 +657,4 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif - tsc_init(); } diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 9bd9637ae692..0e8977871b1f 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -23,7 +23,6 @@ #include <asm/mtrr.h> #include <asm/tlbflush.h> -#include <asm/idle.h> #include <mach_apic.h> /* @@ -624,7 +623,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) /* * At this point the info structure may be out of scope unless wait==1 */ - exit_idle(); irq_enter(); (*func)(info); irq_exit(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 48bfcaa13ecc..4ff55e675576 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,11 +33,6 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ - -/* SMP boot always wants to use real time delay to allow sufficient time for - * the APs to come online */ -#define USE_REAL_TIME_DELAY - #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> @@ -50,6 +45,7 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/percpu.h> +#include <linux/nmi.h> #include <linux/delay.h> #include <linux/mc146818rtc.h> @@ -1283,8 +1279,9 @@ void __cpu_die(unsigned int cpu) int __cpuinit __cpu_up(unsigned int cpu) { + unsigned long flags; #ifdef CONFIG_HOTPLUG_CPU - int ret=0; + int ret = 0; /* * We do warm boot only on cpus that had booted earlier @@ -1302,23 +1299,25 @@ int __cpuinit __cpu_up(unsigned int cpu) /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); return -EIO; } - local_irq_enable(); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); /* - * Check TSC synchronization with the AP: + * Check TSC synchronization with the AP (keep irqs disabled + * while doing so): */ + local_irq_save(flags); check_tsc_sync_source(cpu); + local_irq_restore(flags); - while (!cpu_isset(cpu, cpu_online_map)) + while (!cpu_isset(cpu, cpu_online_map)) { cpu_relax(); + touch_nmi_watchdog(); + } #ifdef CONFIG_X86_GENERICARCH if (num_online_cpus() > 8 && genapic == &apic_default) diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a5350059557a..94e5cb091104 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -262,14 +262,23 @@ void notify_arch_cmos_timer(void) extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ -static void __init hpet_time_init(void) +void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - do_time_init(); + time_init_hook(); } +/* + * This is called directly from init code; we must delay timer setup in the + * HPET case as we can't make the decision to turn on HPET this early in the + * boot process. + * + * The chosen time_init function will usually be hpet_time_init, above, but + * in the case of virtual hardware, an alternative function may be substituted. + */ void __init time_init(void) { - late_time_init = hpet_time_init; + tsc_init(); + late_time_init = choose_time_init(); } diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 79cf608e14ca..45782356a618 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -1,5 +1,5 @@ /* - * arch/i386/kernel/topology.c - Populate driverfs with topology information + * arch/i386/kernel/topology.c - Populate sysfs with topology information * * Written by: Matthew Dobson, IBM Corporation * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 3082a418635c..6cb8f5336732 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -14,16 +14,18 @@ #include <asm/delay.h> #include <asm/tsc.h> #include <asm/io.h> +#include <asm/timer.h> #include "mach_timer.h" +static int tsc_enabled; + /* * On some systems the TSC frequency does not * change with the cpu frequency. So we need * an extra value to store the TSC freq */ unsigned int tsc_khz; -unsigned long long (*custom_sched_clock)(void); int tsc_disable; @@ -102,24 +104,21 @@ unsigned long long sched_clock(void) { unsigned long long this_offset; - if (unlikely(custom_sched_clock)) - return (*custom_sched_clock)(); - /* * Fall back to jiffies if there's no TSC available: */ - if (unlikely(tsc_disable)) + if (unlikely(!tsc_enabled)) /* No locking but a rare wrong value is not a big deal: */ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - rdtscll(this_offset); + get_scheduled_cycles(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } -static unsigned long calculate_cpu_khz(void) +unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; unsigned long count; @@ -186,34 +185,6 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -void __init tsc_init(void) -{ - if (!cpu_has_tsc || tsc_disable) - goto out_no_tsc; - - cpu_khz = calculate_cpu_khz(); - tsc_khz = cpu_khz; - - if (!cpu_khz) - goto out_no_tsc; - - printk("Detected %lu.%03lu MHz processor.\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); - - set_cyc2ns_scale(cpu_khz); - use_tsc_delay(); - return; - -out_no_tsc: - /* - * Set the tsc_disable flag if there's no TSC support, this - * makes it a fast flag for the kernel to see whether it - * should be using the TSC. - */ - tsc_disable = 1; -} - #ifdef CONFIG_CPU_FREQ /* @@ -314,6 +285,7 @@ void mark_tsc_unstable(void) { if (!tsc_unstable) { tsc_unstable = 1; + tsc_enabled = 0; /* Can be called before registration */ if (clocksource_tsc.mult) clocksource_change_rating(&clocksource_tsc, 0); @@ -383,28 +355,49 @@ static void __init check_geode_tsc_reliable(void) static inline void check_geode_tsc_reliable(void) { } #endif -static int __init init_tsc_clocksource(void) + +void __init tsc_init(void) { + if (!cpu_has_tsc || tsc_disable) + goto out_no_tsc; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + + if (!cpu_khz) + goto out_no_tsc; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + set_cyc2ns_scale(cpu_khz); + use_tsc_delay(); - if (cpu_has_tsc && tsc_khz && !tsc_disable) { - /* check blacklist */ - dmi_check_system(bad_tsc_dmi_table); + /* Check and install the TSC clocksource */ + dmi_check_system(bad_tsc_dmi_table); - unsynchronized_tsc(); - check_geode_tsc_reliable(); - current_tsc_khz = tsc_khz; - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + unsynchronized_tsc(); + check_geode_tsc_reliable(); + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, clocksource_tsc.shift); - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } + /* lower the rating if we already know its unstable: */ + if (check_tsc_unstable()) { + clocksource_tsc.rating = 0; + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; + } else + tsc_enabled = 1; - return clocksource_register(&clocksource_tsc); - } + clocksource_register(&clocksource_tsc); - return 0; -} + return; -module_init(init_tsc_clocksource); +out_no_tsc: + /* + * Set the tsc_disable flag if there's no TSC support, this + * makes it a fast flag for the kernel to see whether it + * should be using the TSC. + */ + tsc_disable = 1; +} diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index bb5a7abf949c..697a70e8c0c9 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -23,7 +23,6 @@ */ #include <linux/module.h> -#include <linux/license.h> #include <linux/cpu.h> #include <linux/bootmem.h> #include <linux/mm.h> @@ -35,6 +34,7 @@ #include <asm/processor.h> #include <asm/timer.h> #include <asm/vmi_time.h> +#include <asm/kmap_types.h> /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -47,13 +47,13 @@ typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); (((VROMLONGFUNC *)(rom->func)) (arg)) static struct vrom_header *vmi_rom; -static int license_gplok; -static int disable_nodelay; static int disable_pge; static int disable_pse; static int disable_sep; static int disable_tsc; static int disable_mtrr; +static int disable_noidle; +static int disable_vmi_timer; /* Cached VMI operations */ struct { @@ -69,6 +69,7 @@ struct { void (*flush_tlb)(int); void (*set_initial_ap_state)(int, int); void (*halt)(void); + void (*set_lazy_mode)(int mode); } vmi_ops; /* XXX move this to alternative.h */ @@ -255,7 +256,6 @@ static void vmi_nop(void) } /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ -#ifdef CONFIG_NO_IDLE_HZ static fastcall void vmi_safe_halt(void) { int idle = vmi_stop_hz_timer(); @@ -266,7 +266,6 @@ static fastcall void vmi_safe_halt(void) local_irq_enable(); } } -#endif #ifdef CONFIG_DEBUG_PAGE_TYPE @@ -371,6 +370,24 @@ static void vmi_check_page_type(u32 pfn, int type) #define vmi_check_page_type(p,t) do { } while (0) #endif +static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) +{ + /* + * Internally, the VMI ROM must map virtual addresses to physical + * addresses for processing MMU updates. By the time MMU updates + * are issued, this information is typically already lost. + * Fortunately, the VMI provides a cache of mapping slots for active + * page tables. + * + * We use slot zero for the linear mapping of physical memory, and + * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1. + * + * args: SLOT VA COUNT PFN + */ + BUG_ON(type != KM_PTE0 && type != KM_PTE1); + vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn); +} + static void vmi_allocate_pt(u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); @@ -508,13 +525,14 @@ void vmi_pmd_clear(pmd_t *pmd) #endif #ifdef CONFIG_SMP -struct vmi_ap_state ap; extern void setup_pda(void); -static void __init /* XXX cpu hotplug */ +static void __devinit vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, unsigned long start_esp) { + struct vmi_ap_state ap; + /* Default everything to zero. This is fine for most GPRs. */ memset(&ap, 0, sizeof(struct vmi_ap_state)); @@ -553,10 +571,30 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, /* Protected mode, paging, AM, WP, NE, MP. */ ap.cr0 = 0x80050023; ap.cr4 = mmu_cr4_features; - vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid); + vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid); } #endif +static void vmi_set_lazy_mode(int mode) +{ + static DEFINE_PER_CPU(int, lazy_mode); + + if (!vmi_ops.set_lazy_mode) + return; + + /* Modes should never nest or overlap */ + BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE || + mode == PARAVIRT_LAZY_FLUSH)); + + if (mode == PARAVIRT_LAZY_FLUSH) { + vmi_ops.set_lazy_mode(0); + vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode)); + } else { + vmi_ops.set_lazy_mode(mode); + __get_cpu_var(lazy_mode) = mode; + } +} + static inline int __init check_vmi_rom(struct vrom_header *rom) { struct pci_header *pci; @@ -610,13 +648,14 @@ static inline int __init check_vmi_rom(struct vrom_header *rom) rom->api_version_maj, rom->api_version_min, pci->rom_version_maj, pci->rom_version_min); - license_gplok = license_is_gpl_compatible(license); - if (!license_gplok) { - printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... " - "inlining disabled\n", - license); - add_taint(TAINT_PROPRIETARY_MODULE); - } + /* Don't allow BSD/MIT here for now because we don't want to end up + with any binary only shim layers */ + if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) { + printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n", + license); + return 0; + } + return 1; } @@ -645,12 +684,12 @@ static inline int __init probe_vmi_rom(void) void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ - if (vmi_rom) + if (vmi_ops.set_linear_mapping) vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); } /* - * Return a pointer to the VMI function or a NOP stub + * Return a pointer to a VMI function or NULL if unimplemented */ static void *vmi_get_function(int vmicall) { @@ -661,23 +700,47 @@ static void *vmi_get_function(int vmicall) if (rel->type == VMI_RELOCATION_CALL_REL) return (void *)rel->eip; else - return (void *)vmi_nop; + return NULL; } /* * Helper macro for making the VMI paravirt-ops fill code readable. - * For unimplemented operations, fall back to default. + * For unimplemented operations, fall back to default, unless nop + * is returned by the ROM. */ #define para_fill(opname, vmicall) \ do { \ reloc = call_vrom_long_func(vmi_rom, get_reloc, \ VMI_CALL_##vmicall); \ - if (rel->type != VMI_RELOCATION_NONE) { \ - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ + if (rel->type == VMI_RELOCATION_CALL_REL) \ paravirt_ops.opname = (void *)rel->eip; \ + else if (rel->type == VMI_RELOCATION_NOP) \ + paravirt_ops.opname = (void *)vmi_nop; \ + else if (rel->type != VMI_RELOCATION_NONE) \ + printk(KERN_WARNING "VMI: Unknown relocation " \ + "type %d for " #vmicall"\n",\ + rel->type); \ +} while (0) + +/* + * Helper macro for making the VMI paravirt-ops fill code readable. + * For cached operations which do not match the VMI ROM ABI and must + * go through a tranlation stub. Ignore NOPs, since it is not clear + * a NOP * VMI function corresponds to a NOP paravirt-op when the + * functions are not in 1-1 correspondence. + */ +#define para_wrap(opname, wrapper, cache, vmicall) \ +do { \ + reloc = call_vrom_long_func(vmi_rom, get_reloc, \ + VMI_CALL_##vmicall); \ + BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ + if (rel->type == VMI_RELOCATION_CALL_REL) { \ + paravirt_ops.opname = wrapper; \ + vmi_ops.cache = (void *)rel->eip; \ } \ } while (0) + /* * Activate the VMI interface and switch into paravirtualized mode */ @@ -714,13 +777,8 @@ static inline int __init activate_vmi(void) * rdpmc is not yet used in Linux */ - /* CPUID is special, so very special */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.cpuid = (void *)rel->eip; - paravirt_ops.cpuid = vmi_cpuid; - } + /* CPUID is special, so very special it gets wrapped like a present */ + para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); para_fill(clts, CLTS); para_fill(get_debugreg, GetDR); @@ -737,38 +795,26 @@ static inline int __init activate_vmi(void) para_fill(restore_fl, SetInterruptMask); para_fill(irq_disable, DisableInterrupts); para_fill(irq_enable, EnableInterrupts); + /* irq_save_disable !!! sheer pain */ patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], (char *)paravirt_ops.save_fl); patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], (char *)paravirt_ops.irq_disable); -#ifndef CONFIG_NO_IDLE_HZ - para_fill(safe_halt, Halt); -#else - vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); - paravirt_ops.safe_halt = vmi_safe_halt; -#endif + para_fill(wbinvd, WBINVD); + para_fill(read_tsc, RDTSC); + + /* The following we emulate with trap and emulate for now */ /* paravirt_ops.read_msr = vmi_rdmsr */ /* paravirt_ops.write_msr = vmi_wrmsr */ - para_fill(read_tsc, RDTSC); /* paravirt_ops.rdpmc = vmi_rdpmc */ - /* TR interface doesn't pass TR value */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.set_tr = (void *)rel->eip; - paravirt_ops.load_tr_desc = vmi_set_tr; - } + /* TR interface doesn't pass TR value, wrap */ + para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); /* LDT is special, too */ - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops._set_ldt = (void *)rel->eip; - paravirt_ops.set_ldt = vmi_set_ldt; - } + para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); para_fill(load_gdt, SetGDT); para_fill(load_idt, SetIDT); @@ -779,28 +825,14 @@ static inline int __init activate_vmi(void) para_fill(write_ldt_entry, WriteLDTEntry); para_fill(write_gdt_entry, WriteGDTEntry); para_fill(write_idt_entry, WriteIDTEntry); - reloc = call_vrom_long_func(vmi_rom, get_reloc, - VMI_CALL_UpdateKernelStack); - if (rel->type != VMI_RELOCATION_NONE) { - BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); - vmi_ops.set_kernel_stack = (void *)rel->eip; - paravirt_ops.load_esp0 = vmi_load_esp0; - } - + para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); para_fill(set_iopl_mask, SetIOPLMask); - paravirt_ops.io_delay = (void *)vmi_nop; - if (!disable_nodelay) { - paravirt_ops.const_udelay = (void *)vmi_nop; - } - - para_fill(set_lazy_mode, SetLazyMode); + para_fill(io_delay, IODelay); + para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); - reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); - if (rel->type != VMI_RELOCATION_NONE) { - vmi_ops.flush_tlb = (void *)rel->eip; - paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; - paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; - } + /* user and kernel flush are just handled with different flags to FlushTLB */ + para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); + para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); para_fill(flush_tlb_single, InvalPage); /* @@ -815,27 +847,40 @@ static inline int __init activate_vmi(void) vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); #endif - vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); - vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); - vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); - paravirt_ops.alloc_pt = vmi_allocate_pt; - paravirt_ops.alloc_pd = vmi_allocate_pd; - paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; - paravirt_ops.release_pt = vmi_release_pt; - paravirt_ops.release_pd = vmi_release_pd; - paravirt_ops.set_pte = vmi_set_pte; - paravirt_ops.set_pte_at = vmi_set_pte_at; - paravirt_ops.set_pmd = vmi_set_pmd; - paravirt_ops.pte_update = vmi_update_pte; - paravirt_ops.pte_update_defer = vmi_update_pte_defer; + if (vmi_ops.set_pte) { + paravirt_ops.set_pte = vmi_set_pte; + paravirt_ops.set_pte_at = vmi_set_pte_at; + paravirt_ops.set_pmd = vmi_set_pmd; #ifdef CONFIG_X86_PAE - paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; - paravirt_ops.set_pte_present = vmi_set_pte_present; - paravirt_ops.set_pud = vmi_set_pud; - paravirt_ops.pte_clear = vmi_pte_clear; - paravirt_ops.pmd_clear = vmi_pmd_clear; + paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; + paravirt_ops.set_pte_present = vmi_set_pte_present; + paravirt_ops.set_pud = vmi_set_pud; + paravirt_ops.pte_clear = vmi_pte_clear; + paravirt_ops.pmd_clear = vmi_pmd_clear; #endif + } + + if (vmi_ops.update_pte) { + paravirt_ops.pte_update = vmi_update_pte; + paravirt_ops.pte_update_defer = vmi_update_pte_defer; + } + + vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); + if (vmi_ops.allocate_page) { + paravirt_ops.alloc_pt = vmi_allocate_pt; + paravirt_ops.alloc_pd = vmi_allocate_pd; + paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; + } + + vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); + if (vmi_ops.release_page) { + paravirt_ops.release_pt = vmi_release_pt; + paravirt_ops.release_pd = vmi_release_pd; + } + para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, + SetLinearMapping); + /* * These MUST always be patched. Don't support indirect jumps * through these operations, as the VMI interface may use either @@ -847,21 +892,20 @@ static inline int __init activate_vmi(void) paravirt_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP - paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; - vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); + para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); #endif #ifdef CONFIG_X86_LOCAL_APIC - paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); - paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); - paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); + para_fill(apic_read, APICRead); + para_fill(apic_write, APICWrite); + para_fill(apic_write_atomic, APICWrite); #endif /* * Check for VMI timer functionality by probing for a cycle frequency method */ reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); - if (rel->type != VMI_RELOCATION_NONE) { + if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; vmi_timer_ops.get_cycle_counter = vmi_get_function(VMI_CALL_GetCycleCounter); @@ -879,9 +923,22 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; #endif - custom_sched_clock = vmi_sched_clock; + paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.get_cpu_khz = vmi_cpu_khz; + + /* We have true wallclock functions; disable CMOS clock sync */ + no_sync_cmos_clock = 1; + } else { + disable_noidle = 1; + disable_vmi_timer = 1; } + /* No idle HZ mode only works if VMI timer and no idle is enabled */ + if (disable_noidle || disable_vmi_timer) + para_fill(safe_halt, Halt); + else + para_wrap(safe_halt, vmi_safe_halt, halt, Halt); + /* * Alternative instruction rewriting doesn't happen soon enough * to convert VMI_IRET to a call instead of a jump; so we have @@ -914,7 +971,9 @@ void __init vmi_init(void) local_irq_save(flags); activate_vmi(); -#ifdef CONFIG_SMP + +#ifdef CONFIG_X86_IO_APIC + /* This is virtual hardware; timer routing is wired correctly */ no_timer_check = 1; #endif local_irq_restore(flags & X86_EFLAGS_IF); @@ -925,9 +984,7 @@ static int __init parse_vmi(char *arg) if (!arg) return -EINVAL; - if (!strcmp(arg, "disable_nodelay")) - disable_nodelay = 1; - else if (!strcmp(arg, "disable_pge")) { + if (!strcmp(arg, "disable_pge")) { clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); disable_pge = 1; } else if (!strcmp(arg, "disable_pse")) { @@ -942,7 +999,11 @@ static int __init parse_vmi(char *arg) } else if (!strcmp(arg, "disable_mtrr")) { clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); disable_mtrr = 1; - } + } else if (!strcmp(arg, "disable_timer")) { + disable_vmi_timer = 1; + disable_noidle = 1; + } else if (!strcmp(arg, "disable_noidle")) + disable_noidle = 1; return 0; } diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c index 76d2adcae5a3..9dfb17739b67 100644 --- a/arch/i386/kernel/vmitime.c +++ b/arch/i386/kernel/vmitime.c @@ -123,12 +123,10 @@ static struct clocksource clocksource_vmi = { static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); static struct irqaction vmi_timer_irq = { - vmi_timer_interrupt, - SA_INTERRUPT, - CPU_MASK_NONE, - "VMI-alarm", - NULL, - NULL + .handler = vmi_timer_interrupt, + .flags = IRQF_DISABLED, + .mask = CPU_MASK_NONE, + .name = "VMI-alarm", }; /* Alarm rate */ @@ -153,13 +151,6 @@ static void vmi_get_wallclock_ts(struct timespec *ts) ts->tv_sec = wallclock; } -static void update_xtime_from_wallclock(void) -{ - struct timespec ts; - vmi_get_wallclock_ts(&ts); - do_settimeofday(&ts); -} - unsigned long vmi_get_wallclock(void) { struct timespec ts; @@ -172,11 +163,20 @@ int vmi_set_wallclock(unsigned long now) return -1; } -unsigned long long vmi_sched_clock(void) +unsigned long long vmi_get_sched_cycles(void) { return read_available_cycles(); } +unsigned long vmi_cpu_khz(void) +{ + unsigned long long khz; + + khz = vmi_timer_ops.get_cycle_frequency(); + (void)do_div(khz, 1000); + return khz; +} + void __init vmi_time_init(void) { unsigned long long cycles_per_sec, cycles_per_msec; @@ -188,25 +188,16 @@ void __init vmi_time_init(void) set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); #endif - no_sync_cmos_clock = 1; - - vmi_get_wallclock_ts(&xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - real_cycles_accounted_system = read_real_cycles(); - update_xtime_from_wallclock(); per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); - cycles_per_jiffy = cycles_per_sec; (void)do_div(cycles_per_jiffy, HZ); cycles_per_alarm = cycles_per_sec; (void)do_div(cycles_per_alarm, alarm_hz); cycles_per_msec = cycles_per_sec; (void)do_div(cycles_per_msec, 1000); - cpu_khz = cycles_per_msec; printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, @@ -250,7 +241,7 @@ void __init vmi_timer_setup_boot_alarm(void) /* Initialize the time accounting variables for an AP on an SMP system. * Also, set the local alarm for the AP. */ -void __init vmi_timer_setup_secondary_alarm(void) +void __devinit vmi_timer_setup_secondary_alarm(void) { int cpu = smp_processor_id(); @@ -276,16 +267,13 @@ static void vmi_account_real_cycles(unsigned long long cur_real_cycles) cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; while (cycles_not_accounted >= cycles_per_jiffy) { - /* systems wide jiffies and wallclock. */ + /* systems wide jiffies. */ do_timer(1); cycles_not_accounted -= cycles_per_jiffy; real_cycles_accounted_system += cycles_per_jiffy; } - if (vmi_timer_ops.wallclock_updated()) - update_xtime_from_wallclock(); - write_sequnlock(&xtime_lock); } @@ -380,7 +368,6 @@ int vmi_stop_hz_timer(void) unsigned long seq, next; unsigned long long real_cycles_expiry; int cpu = smp_processor_id(); - int idle; BUG_ON(!irqs_disabled()); if (sysctl_hz_timer != 0) @@ -388,13 +375,13 @@ int vmi_stop_hz_timer(void) cpu_set(cpu, nohz_cpu_mask); smp_mb(); + if (rcu_needs_cpu(cpu) || local_softirq_pending() || - (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { + (next = next_timer_interrupt(), + time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { cpu_clear(cpu, nohz_cpu_mask); - next = jiffies; - idle = 0; - } else - idle = 1; + return 0; + } /* Convert jiffies to the real cycle counter. */ do { @@ -404,17 +391,13 @@ int vmi_stop_hz_timer(void) } while (read_seqretry(&xtime_lock, seq)); /* This cpu is going idle. Disable the periodic alarm. */ - if (idle) { - vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); - per_cpu(idle_start_jiffies, cpu) = jiffies; - } - + vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); + per_cpu(idle_start_jiffies, cpu) = jiffies; /* Set the real time alarm to expire at the next event. */ vmi_timer_ops.set_alarm( - VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, - real_cycles_expiry, 0); - - return idle; + VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, + real_cycles_expiry, 0); + return 1; } static void vmi_reenable_hz_timer(int cpu) diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index ca51610955df..6f38f818380b 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -26,7 +26,7 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) jiffies = jiffies_64; -_proxy_pda = 0; +_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ |