summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/acpi/boot.c28
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c10
-rw-r--r--arch/i386/kernel/alternative.c21
-rw-r--r--arch/i386/kernel/apic.c132
-rw-r--r--arch/i386/kernel/cpu/amd.c34
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig1
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c6
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h26
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c2
-rw-r--r--arch/i386/kernel/hpet.c150
-rw-r--r--arch/i386/kernel/i386_ksyms.c2
-rw-r--r--arch/i386/kernel/i8253.c12
-rw-r--r--arch/i386/kernel/io_apic.c10
-rw-r--r--arch/i386/kernel/irq.c3
-rw-r--r--arch/i386/kernel/microcode.c71
-rw-r--r--arch/i386/kernel/nmi.c146
-rw-r--r--arch/i386/kernel/paravirt.c8
-rw-r--r--arch/i386/kernel/pci-dma.c2
-rw-r--r--arch/i386/kernel/process.c53
-rw-r--r--arch/i386/kernel/setup.c1
-rw-r--r--arch/i386/kernel/smp.c2
-rw-r--r--arch/i386/kernel/smpboot.c21
-rw-r--r--arch/i386/kernel/time.c15
-rw-r--r--arch/i386/kernel/topology.c2
-rw-r--r--arch/i386/kernel/tsc.c99
-rw-r--r--arch/i386/kernel/vmi.c257
-rw-r--r--arch/i386/kernel/vmitime.c69
-rw-r--r--arch/i386/kernel/vmlinux.lds.S2
29 files changed, 710 insertions, 479 deletions
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index fb3e72328a5a..9ea5b8ecc7e1 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -850,10 +850,9 @@ static inline int acpi_parse_madt_ioapic_entries(void)
static void __init acpi_process_madt(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
- int count, error;
+ int error;
- count = acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt);
- if (count >= 1) {
+ if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
/*
* Parse MADT LAPIC entries
@@ -1073,7 +1072,28 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
"ASUS A7V ACPI BIOS Revision 1007"),
},
},
-
+ {
+ /*
+ * Latest BIOS for IBM 600E (1.16) has bad pcinum
+ * for LPC bridge, which is needed for the PCI
+ * interrupt links to work. DSDT fix is in bug 5966.
+ * 2645, 2646 model numbers are shared with 600/600E/600X
+ */
+ .callback = disable_acpi_irq,
+ .ident = "IBM Thinkpad 600 Series 2645",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2645"),
+ },
+ },
+ {
+ .callback = disable_acpi_irq,
+ .ident = "IBM Thinkpad 600 Series 2646",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2646"),
+ },
+ },
/*
* Boxes that need ACPI PCI IRQ routing and PCI scan disabled
*/
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index bf86f7662d8b..8f7efd38254d 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -14,11 +14,8 @@
#ifdef CONFIG_ACPI
-static int nvidia_hpet_detected __initdata;
-
static int __init nvidia_hpet_check(struct acpi_table_header *header)
{
- nvidia_hpet_detected = 1;
return 0;
}
#endif
@@ -26,12 +23,13 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header)
static int __init check_bridge(int vendor, int device)
{
#ifdef CONFIG_ACPI
+ static int warned;
/* According to Nvidia all timer overrides are bogus unless HPET
is enabled. */
if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
- nvidia_hpet_detected = 0;
- acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check);
- if (nvidia_hpet_detected == 0) {
+ if (!warned && acpi_table_parse(ACPI_SIG_HPET,
+ nvidia_hpet_check)) {
+ warned = 1;
acpi_skip_timer_override = 1;
printk(KERN_INFO "Nvidia board "
"detected. Ignoring ACPI "
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 9eca21b49f6b..426f59b0106b 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -5,15 +5,9 @@
#include <asm/alternative.h>
#include <asm/sections.h>
-static int no_replacement = 0;
static int smp_alt_once = 0;
static int debug_alternative = 0;
-static int __init noreplacement_setup(char *s)
-{
- no_replacement = 1;
- return 1;
-}
static int __init bootonly(char *str)
{
smp_alt_once = 1;
@@ -25,7 +19,6 @@ static int __init debug_alt(char *str)
return 1;
}
-__setup("noreplacement", noreplacement_setup);
__setup("smp-alt-boot", bootonly);
__setup("debug-alternative", debug_alt);
@@ -252,9 +245,6 @@ void alternatives_smp_module_add(struct module *mod, char *name,
struct smp_alt_module *smp;
unsigned long flags;
- if (no_replacement)
- return;
-
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
@@ -289,7 +279,7 @@ void alternatives_smp_module_del(struct module *mod)
struct smp_alt_module *item;
unsigned long flags;
- if (no_replacement || smp_alt_once)
+ if (smp_alt_once)
return;
spin_lock_irqsave(&smp_alt, flags);
@@ -320,7 +310,7 @@ void alternatives_smp_switch(int smp)
return;
#endif
- if (no_replacement || smp_alt_once)
+ if (smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -386,13 +376,6 @@ extern struct paravirt_patch __start_parainstructions[],
void __init alternative_instructions(void)
{
unsigned long flags;
- if (no_replacement) {
- printk(KERN_INFO "(SMP-)alternatives turned off\n");
- free_init_pages("SMP alternatives",
- (unsigned long)__smp_alt_begin,
- (unsigned long)__smp_alt_end);
- return;
- }
local_irq_save(flags);
apply_alternatives(__alt_instructions, __alt_instructions_end);
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 9655c233e6f1..93aa911646ad 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -28,6 +28,7 @@
#include <linux/clockchips.h>
#include <linux/acpi_pmtmr.h>
#include <linux/module.h>
+#include <linux/dmi.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -38,7 +39,6 @@
#include <asm/hpet.h>
#include <asm/i8253.h>
#include <asm/nmi.h>
-#include <asm/idle.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -62,6 +62,11 @@ static int enable_local_apic __initdata = 0;
/* Local APIC timer verification ok */
static int local_apic_timer_verify_ok;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int local_apic_timer_disabled;
+/* Local APIC timer works in C2 */
+int local_apic_timer_c2_ok;
+EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
@@ -339,6 +344,23 @@ void __init setup_boot_APIC_clock(void)
void (*real_handler)(struct clock_event_device *dev);
unsigned long deltaj;
long delta, deltapm;
+ int pm_referenced = 0;
+
+ if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN))
+ local_apic_timer_disabled = 1;
+
+ /*
+ * The local apic timer can be disabled via the kernel
+ * commandline or from the test above. Register the lapic
+ * timer as a dummy clock event source on SMP systems, so the
+ * broadcast mechanism is used. On UP systems simply ignore it.
+ */
+ if (local_apic_timer_disabled) {
+ /* No broadcast on UP ! */
+ if (num_possible_cpus() > 1)
+ setup_APIC_timer();
+ return;
+ }
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
@@ -358,7 +380,8 @@ void __init setup_boot_APIC_clock(void)
/* Let the interrupts run */
local_irq_enable();
- while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+ cpu_relax();
local_irq_disable();
@@ -395,6 +418,7 @@ void __init setup_boot_APIC_clock(void)
"%lu (%ld)\n", (unsigned long) res, delta);
delta = (long) res;
}
+ pm_referenced = 1;
}
/* Calculate the scaled math multiplication factor */
@@ -424,69 +448,43 @@ void __init setup_boot_APIC_clock(void)
calibration_result / (1000000 / HZ),
calibration_result % (1000000 / HZ));
-
- apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
-
- /*
- * Setup the apic timer manually
- */
local_apic_timer_verify_ok = 1;
- levt->event_handler = lapic_cal_handler;
- lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
- lapic_cal_loops = -1;
- /* Let the interrupts run */
- local_irq_enable();
+ /* We trust the pm timer based calibration */
+ if (!pm_referenced) {
+ apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
- while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
+ /*
+ * Setup the apic timer manually
+ */
+ levt->event_handler = lapic_cal_handler;
+ lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+ lapic_cal_loops = -1;
- local_irq_disable();
+ /* Let the interrupts run */
+ local_irq_enable();
- /* Stop the lapic timer */
- lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+ while(lapic_cal_loops <= LAPIC_CAL_LOOPS)
+ cpu_relax();
- local_irq_enable();
+ local_irq_disable();
- /* Jiffies delta */
- deltaj = lapic_cal_j2 - lapic_cal_j1;
- apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+ /* Stop the lapic timer */
+ lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
- /* Check, if the PM timer is available */
- deltapm = lapic_cal_pm2 - lapic_cal_pm1;
- apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+ local_irq_enable();
- local_apic_timer_verify_ok = 0;
+ /* Jiffies delta */
+ deltaj = lapic_cal_j2 - lapic_cal_j1;
+ apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
- if (deltapm) {
- if (deltapm > (pm_100ms - pm_thresh) &&
- deltapm < (pm_100ms + pm_thresh)) {
- apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
- /* Check, if the jiffies result is consistent */
- if (deltaj < LAPIC_CAL_LOOPS-2 ||
- deltaj > LAPIC_CAL_LOOPS+2) {
- /*
- * Not sure, what we can do about this one.
- * When high resultion timers are active
- * and the lapic timer does not stop in C3
- * we are fine. Otherwise more trouble might
- * be waiting. -- tglx
- */
- printk(KERN_WARNING "Global event device %s "
- "has wrong frequency "
- "(%lu ticks instead of %d)\n",
- global_clock_event->name, deltaj,
- LAPIC_CAL_LOOPS);
- }
- local_apic_timer_verify_ok = 1;
- }
- } else {
/* Check, if the jiffies result is consistent */
- if (deltaj >= LAPIC_CAL_LOOPS-2 &&
- deltaj <= LAPIC_CAL_LOOPS+2) {
+ if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
- local_apic_timer_verify_ok = 1;
- }
- }
+ else
+ local_apic_timer_verify_ok = 0;
+ } else
+ local_irq_enable();
if (!local_apic_timer_verify_ok) {
printk(KERN_WARNING
@@ -494,8 +492,15 @@ void __init setup_boot_APIC_clock(void)
/* No broadcast on UP ! */
if (num_possible_cpus() == 1)
return;
- } else
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ } else {
+ /*
+ * If nmi_watchdog is set to IO_APIC, we need the
+ * PIT/HPET going. Otherwise register lapic as a dummy
+ * device.
+ */
+ if (nmi_watchdog != NMI_IO_APIC)
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ }
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
@@ -561,7 +566,6 @@ void fastcall smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- exit_idle();
irq_enter();
local_apic_timer_interrupt();
irq_exit();
@@ -1198,6 +1202,20 @@ static int __init parse_nolapic(char *arg)
}
early_param("nolapic", parse_nolapic);
+static int __init parse_disable_lapic_timer(char *arg)
+{
+ local_apic_timer_disabled = 1;
+ return 0;
+}
+early_param("nolapic_timer", parse_disable_lapic_timer);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+ local_apic_timer_c2_ok = 1;
+ return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
static int __init apic_set_verbosity(char *str)
{
if (strcmp("debug", str) == 0)
@@ -1221,7 +1239,6 @@ void smp_spurious_interrupt(struct pt_regs *regs)
{
unsigned long v;
- exit_idle();
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
@@ -1245,7 +1262,6 @@ void smp_error_interrupt(struct pt_regs *regs)
{
unsigned long v, v1;
- exit_idle();
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 41cfea57232b..2d47db482972 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -22,6 +22,37 @@
extern void vide(void);
__asm__(".align 4\nvide: ret");
+#define ENABLE_C1E_MASK 0x18000000
+#define CPUID_PROCESSOR_SIGNATURE 1
+#define CPUID_XFAM 0x0ff00000
+#define CPUID_XFAM_K8 0x00000000
+#define CPUID_XFAM_10H 0x00100000
+#define CPUID_XFAM_11H 0x00200000
+#define CPUID_XMOD 0x000f0000
+#define CPUID_XMOD_REV_F 0x00040000
+
+/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
+static __cpuinit int amd_apic_timer_broken(void)
+{
+ u32 lo, hi;
+ u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+ switch (eax & CPUID_XFAM) {
+ case CPUID_XFAM_K8:
+ if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
+ break;
+ case CPUID_XFAM_10H:
+ case CPUID_XFAM_11H:
+ rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
+ if (lo & ENABLE_C1E_MASK)
+ return 1;
+ break;
+ default:
+ /* err on the side of caution */
+ return 1;
+ }
+ return 0;
+}
+
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -241,6 +272,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if (cpuid_eax(0x80000000) >= 0x80000006)
num_cache_leaves = 3;
+
+ if (amd_apic_timer_broken())
+ set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
}
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
index 6c52182ca323..e912aae9473c 100644
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -125,7 +125,6 @@ config X86_SPEEDSTEP_CENTRINO_ACPI
bool "Use ACPI tables to decode valid frequency/voltage (deprecated)"
depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR
depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m)
- default y
help
This is deprecated and this functionality is now merged into
acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index b59878a0d9b3..2b030d6ccbf7 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -61,8 +61,8 @@ static int cpu_model;
static unsigned int numscales=16;
static unsigned int fsb;
-static struct mV_pos *vrm_mV_table;
-static unsigned char *mV_vrm_table;
+static const struct mV_pos *vrm_mV_table;
+static const unsigned char *mV_vrm_table;
struct f_msr {
u8 vrm;
u8 pos;
@@ -758,7 +758,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
NULL, (void *)&pr);
/* Check ACPI support for C3 state */
- if (pr != NULL && longhaul_version != TYPE_LONGHAUL_V1) {
+ if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
cx = &pr->power.states[ACPI_STATE_C3];
if (cx->address > 0 && cx->latency <= 1000) {
longhaul_flags |= USE_ACPI_C3;
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index bb0a04b1d1ab..102548f12842 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -56,7 +56,7 @@ union msr_longhaul {
/*
* VIA C3 Samuel 1 & Samuel 2 (stepping 0)
*/
-static int __initdata samuel1_clock_ratio[16] = {
+static const int __initdata samuel1_clock_ratio[16] = {
-1, /* 0000 -> RESERVED */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -75,7 +75,7 @@ static int __initdata samuel1_clock_ratio[16] = {
-1, /* 1111 -> RESERVED */
};
-static int __initdata samuel1_eblcr[16] = {
+static const int __initdata samuel1_eblcr[16] = {
50, /* 0000 -> RESERVED */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -97,7 +97,7 @@ static int __initdata samuel1_eblcr[16] = {
/*
* VIA C3 Samuel2 Stepping 1->15
*/
-static int __initdata samuel2_eblcr[16] = {
+static const int __initdata samuel2_eblcr[16] = {
50, /* 0000 -> 5.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -119,7 +119,7 @@ static int __initdata samuel2_eblcr[16] = {
/*
* VIA C3 Ezra
*/
-static int __initdata ezra_clock_ratio[16] = {
+static const int __initdata ezra_clock_ratio[16] = {
100, /* 0000 -> 10.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -138,7 +138,7 @@ static int __initdata ezra_clock_ratio[16] = {
120, /* 1111 -> 12.0x */
};
-static int __initdata ezra_eblcr[16] = {
+static const int __initdata ezra_eblcr[16] = {
50, /* 0000 -> 5.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -160,7 +160,7 @@ static int __initdata ezra_eblcr[16] = {
/*
* VIA C3 (Ezra-T) [C5M].
*/
-static int __initdata ezrat_clock_ratio[32] = {
+static const int __initdata ezrat_clock_ratio[32] = {
100, /* 0000 -> 10.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -196,7 +196,7 @@ static int __initdata ezrat_clock_ratio[32] = {
-1, /* 1111 -> RESERVED (12.0x) */
};
-static int __initdata ezrat_eblcr[32] = {
+static const int __initdata ezrat_eblcr[32] = {
50, /* 0000 -> 5.0x */
30, /* 0001 -> 3.0x */
40, /* 0010 -> 4.0x */
@@ -235,7 +235,7 @@ static int __initdata ezrat_eblcr[32] = {
/*
* VIA C3 Nehemiah */
-static int __initdata nehemiah_clock_ratio[32] = {
+static const int __initdata nehemiah_clock_ratio[32] = {
100, /* 0000 -> 10.0x */
160, /* 0001 -> 16.0x */
40, /* 0010 -> 4.0x */
@@ -270,7 +270,7 @@ static int __initdata nehemiah_clock_ratio[32] = {
120, /* 1111 -> 12.0x */
};
-static int __initdata nehemiah_eblcr[32] = {
+static const int __initdata nehemiah_eblcr[32] = {
50, /* 0000 -> 5.0x */
160, /* 0001 -> 16.0x */
40, /* 0010 -> 4.0x */
@@ -315,7 +315,7 @@ struct mV_pos {
unsigned short pos;
};
-static struct mV_pos __initdata vrm85_mV[32] = {
+static const struct mV_pos __initdata vrm85_mV[32] = {
{1250, 8}, {1200, 6}, {1150, 4}, {1100, 2},
{1050, 0}, {1800, 30}, {1750, 28}, {1700, 26},
{1650, 24}, {1600, 22}, {1550, 20}, {1500, 18},
@@ -326,14 +326,14 @@ static struct mV_pos __initdata vrm85_mV[32] = {
{1475, 17}, {1425, 15}, {1375, 13}, {1325, 11}
};
-static unsigned char __initdata mV_vrm85[32] = {
+static const unsigned char __initdata mV_vrm85[32] = {
0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11,
0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d,
0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19,
0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15
};
-static struct mV_pos __initdata mobilevrm_mV[32] = {
+static const struct mV_pos __initdata mobilevrm_mV[32] = {
{1750, 31}, {1700, 30}, {1650, 29}, {1600, 28},
{1550, 27}, {1500, 26}, {1450, 25}, {1400, 24},
{1350, 23}, {1300, 22}, {1250, 21}, {1200, 20},
@@ -344,7 +344,7 @@ static struct mV_pos __initdata mobilevrm_mV[32] = {
{675, 3}, {650, 2}, {625, 1}, {600, 0}
};
-static unsigned char __initdata mV_mobilevrm[32] = {
+static const unsigned char __initdata mV_mobilevrm[32] = {
0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 54382760983a..837b04166a47 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -68,7 +68,7 @@ union powernow_acpi_control_t {
#ifdef CONFIG_CPU_FREQ_DEBUG
/* divide by 1000 to get VCore voltage in V. */
-static int mobile_vid_table[32] = {
+static const int mobile_vid_table[32] = {
2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
@@ -77,7 +77,7 @@ static int mobile_vid_table[32] = {
#endif
/* divide by 10 to get FID. */
-static int fid_codes[32] = {
+static const int fid_codes[32] = {
110, 115, 120, 125, 50, 55, 60, 65,
70, 75, 80, 85, 90, 95, 100, 105,
30, 190, 40, 200, 130, 135, 140, 210,
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 8359c19d3a23..504434a46011 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -12,7 +12,6 @@
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/apic.h>
-#include <asm/idle.h>
#include <asm/therm_throt.h>
@@ -60,7 +59,6 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm
fastcall void smp_thermal_interrupt(struct pt_regs *regs)
{
- exit_idle();
irq_enter();
vendor_thermal_interrupt(regs);
irq_exit();
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index e1006b7acc9e..17d73459fc5f 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -3,6 +3,8 @@
#include <linux/errno.h>
#include <linux/hpet.h>
#include <linux/init.h>
+#include <linux/sysdev.h>
+#include <linux/pm.h>
#include <asm/hpet.h>
#include <asm/io.h>
@@ -197,16 +199,34 @@ static int hpet_next_event(unsigned long delta,
cnt += delta;
hpet_writel(cnt, HPET_T0_CMP);
- return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0);
+ return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0;
}
/*
+ * Clock source related code
+ */
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)hpet_readl(HPET_COUNTER);
+}
+
+static struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = HPET_MASK,
+ .shift = HPET_SHIFT,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+/*
* Try to setup the HPET timer
*/
int __init hpet_enable(void)
{
unsigned long id;
uint64_t hpet_freq;
+ u64 tmp;
if (!is_hpet_capable())
return 0;
@@ -253,6 +273,25 @@ int __init hpet_enable(void)
/* Start the counter */
hpet_start_counter();
+ /* Initialize and register HPET clocksource
+ *
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ clocksource_register(&clocksource_hpet);
+
+
if (id & HPET_ID_LEGSUP) {
hpet_enable_int();
hpet_reserve_platform_timers(id);
@@ -270,52 +309,10 @@ int __init hpet_enable(void)
out_nohpet:
iounmap(hpet_virt_address);
hpet_virt_address = NULL;
+ boot_hpet_disable = 1;
return 0;
}
-/*
- * Clock source related code
- */
-static cycle_t read_hpet(void)
-{
- return (cycle_t)hpet_readl(HPET_COUNTER);
-}
-
-static struct clocksource clocksource_hpet = {
- .name = "hpet",
- .rating = 250,
- .read = read_hpet,
- .mask = HPET_MASK,
- .shift = HPET_SHIFT,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int __init init_hpet_clocksource(void)
-{
- u64 tmp;
-
- if (!hpet_virt_address)
- return -ENODEV;
-
- /*
- * hpet period is in femto seconds per cycle
- * so we need to convert this to ns/cyc units
- * aproximated by mult/2^shift
- *
- * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
- * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
- * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
- * (fsec/cyc << shift)/1000000 = mult
- * (hpet_period << shift)/FSEC_PER_NSEC = mult
- */
- tmp = (u64)hpet_period << HPET_SHIFT;
- do_div(tmp, FSEC_PER_NSEC);
- clocksource_hpet.mult = (u32)tmp;
-
- return clocksource_register(&clocksource_hpet);
-}
-
-module_init(init_hpet_clocksource);
#ifdef CONFIG_HPET_EMULATE_RTC
@@ -527,3 +524,68 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
#endif
+
+
+/*
+ * Suspend/resume part
+ */
+
+#ifdef CONFIG_PM
+
+static int hpet_suspend(struct sys_device *sys_device, pm_message_t state)
+{
+ unsigned long cfg = hpet_readl(HPET_CFG);
+
+ cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY);
+ hpet_writel(cfg, HPET_CFG);
+
+ return 0;
+}
+
+static int hpet_resume(struct sys_device *sys_device)
+{
+ unsigned int id;
+
+ hpet_start_counter();
+
+ id = hpet_readl(HPET_ID);
+
+ if (id & HPET_ID_LEGSUP)
+ hpet_enable_int();
+
+ return 0;
+}
+
+static struct sysdev_class hpet_class = {
+ set_kset_name("hpet"),
+ .suspend = hpet_suspend,
+ .resume = hpet_resume,
+};
+
+static struct sys_device hpet_device = {
+ .id = 0,
+ .cls = &hpet_class,
+};
+
+
+static __init int hpet_register_sysfs(void)
+{
+ int err;
+
+ if (!is_hpet_capable())
+ return 0;
+
+ err = sysdev_class_register(&hpet_class);
+
+ if (!err) {
+ err = sysdev_register(&hpet_device);
+ if (err)
+ sysdev_class_unregister(&hpet_class);
+ }
+
+ return err;
+}
+
+device_initcall(hpet_register_sysfs);
+
+#endif
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index e3d4b73bfdb0..4afe26e86260 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -28,3 +28,5 @@ EXPORT_SYMBOL(__read_lock_failed);
#endif
EXPORT_SYMBOL(csum_partial);
+
+EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index a6bc7bb38834..10cef5ca8a5b 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -47,9 +47,17 @@ static void init_pit_timer(enum clock_event_mode mode,
outb(LATCH >> 8 , PIT_CH0); /* MSB */
break;
- case CLOCK_EVT_MODE_ONESHOT:
+ /*
+ * Avoid unnecessary state transitions, as it confuses
+ * Geode / Cyrix based boxen.
+ */
case CLOCK_EVT_MODE_SHUTDOWN:
+ if (evt->mode == CLOCK_EVT_MODE_UNUSED)
+ break;
case CLOCK_EVT_MODE_UNUSED:
+ if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN)
+ break;
+ case CLOCK_EVT_MODE_ONESHOT:
/* One shot setup */
outb_p(0x38, PIT_MODE);
udelay(10);
@@ -195,4 +203,4 @@ static int __init init_pit_clocksource(void)
clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
return clocksource_register(&clocksource_pit);
}
-module_init(init_pit_clocksource);
+arch_initcall(init_pit_clocksource);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 4ccebd454e25..b3ab8ffebd27 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -343,7 +343,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
break;
entry = irq_2_pin + entry->next;
}
- set_native_irq_info(irq, cpumask);
+ irq_desc[irq].affinity = cpumask;
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -736,7 +736,7 @@ failed:
return 0;
}
-int __init irqbalance_disable(char *str)
+int __devinit irqbalance_disable(char *str)
{
irqbalance_disabled = 1;
return 1;
@@ -1354,7 +1354,6 @@ static void __init setup_IO_APIC_irqs(void)
}
spin_lock_irqsave(&ioapic_lock, flags);
__ioapic_write_entry(apic, pin, entry);
- set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
}
@@ -2585,7 +2584,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg(irq, &msg);
- set_native_irq_info(irq, mask);
+ irq_desc[irq].affinity = mask;
}
#endif /* CONFIG_SMP */
@@ -2669,7 +2668,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
dest = cpu_mask_to_apicid(mask);
target_ht_irq(irq, dest);
- set_native_irq_info(irq, mask);
+ irq_desc[irq].affinity = mask;
}
#endif
@@ -2875,7 +2874,6 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
spin_lock_irqsave(&ioapic_lock, flags);
__ioapic_write_entry(ioapic, pin, entry);
- set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
return 0;
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 0f2ca590bf23..8db8d514c9c0 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -18,8 +18,6 @@
#include <linux/cpu.h>
#include <linux/delay.h>
-#include <asm/idle.h>
-
#include <asm/apic.h>
#include <asm/uaccess.h>
@@ -77,7 +75,6 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
- exit_idle();
if (unlikely((unsigned)irq >= NR_IRQS)) {
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index b8f16633a6ec..cbe7ec8dbb9f 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -567,6 +567,53 @@ static int cpu_request_microcode(int cpu)
return error;
}
+static int apply_microcode_on_cpu(int cpu)
+{
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ cpumask_t old;
+ unsigned int val[2];
+ int err = 0;
+
+ if (!uci->mc)
+ return -EINVAL;
+
+ old = current->cpus_allowed;
+ set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+ /* Check if the microcode we have in memory matches the CPU */
+ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+ cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
+ err = -EINVAL;
+
+ if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
+ /* get processor flags from MSR 0x17 */
+ rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+ if (uci->pf != (1 << ((val[1] >> 18) & 7)))
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ /* see notes above for revision 1.07. Apparent chip bug */
+ sync_core();
+ /* get the current revision from MSR 0x8B */
+ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+ if (uci->rev != val[1])
+ err = -EINVAL;
+ }
+
+ if (!err)
+ apply_microcode(cpu);
+ else
+ printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
+ " sig=0x%x, pf=0x%x, rev=0x%x\n",
+ cpu, uci->sig, uci->pf, uci->rev);
+
+ set_cpus_allowed(current, old);
+ return err;
+}
+
static void microcode_init_cpu(int cpu)
{
cpumask_t old;
@@ -577,7 +624,8 @@ static void microcode_init_cpu(int cpu)
set_cpus_allowed(current, cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
collect_cpu_info(cpu);
- if (uci->valid && system_state == SYSTEM_RUNNING)
+ if (uci->valid && system_state == SYSTEM_RUNNING &&
+ !suspend_cpu_hotplug)
cpu_request_microcode(cpu);
mutex_unlock(&microcode_mutex);
set_cpus_allowed(current, old);
@@ -663,13 +711,24 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
return 0;
pr_debug("Microcode:CPU %d added\n", cpu);
- memset(uci, 0, sizeof(*uci));
+ /* If suspend_cpu_hotplug is set, the system is resuming and we should
+ * use the data from before the suspend.
+ */
+ if (suspend_cpu_hotplug) {
+ err = apply_microcode_on_cpu(cpu);
+ if (err)
+ microcode_fini_cpu(cpu);
+ }
+ if (!uci->valid)
+ memset(uci, 0, sizeof(*uci));
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
return err;
- microcode_init_cpu(cpu);
+ if (!uci->valid)
+ microcode_init_cpu(cpu);
+
return 0;
}
@@ -680,7 +739,11 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
if (!cpu_online(cpu))
return 0;
pr_debug("Microcode:CPU %d removed\n", cpu);
- microcode_fini_cpu(cpu);
+ /* If suspend_cpu_hotplug is set, the system is suspending and we should
+ * keep the microcode in memory for the resume.
+ */
+ if (!suspend_cpu_hotplug)
+ microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
}
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 821df34d2b3a..84c3497efb60 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -41,16 +41,17 @@ int nmi_watchdog_enabled;
* different subsystems this reservation system just tries to coordinate
* things a little
*/
-static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
-static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
-
-static cpumask_t backtrace_mask = CPU_MASK_NONE;
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
*/
#define NMI_MAX_COUNTER_BITS 66
+#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
+
+static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
+static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
* <0: the lapic NMI watchdog has not been set up, and cannot
@@ -122,64 +123,129 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* checks for a bit availability (hack for oprofile) */
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
{
+ int cpu;
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+ for_each_possible_cpu (cpu) {
+ if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
+ return 0;
+ }
+ return 1;
}
/* checks the an msr for availability */
int avail_to_resrv_perfctr_nmi(unsigned int msr)
{
unsigned int counter;
+ int cpu;
counter = nmi_perfctr_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+ for_each_possible_cpu (cpu) {
+ if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
+ return 0;
+ }
+ return 1;
}
-int reserve_perfctr_nmi(unsigned int msr)
+static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
{
unsigned int counter;
+ if (cpu < 0)
+ cpu = smp_processor_id();
counter = nmi_perfctr_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+ if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
return 1;
return 0;
}
-void release_perfctr_nmi(unsigned int msr)
+static void __release_perfctr_nmi(int cpu, unsigned int msr)
{
unsigned int counter;
+ if (cpu < 0)
+ cpu = smp_processor_id();
counter = nmi_perfctr_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+ clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]);
}
-int reserve_evntsel_nmi(unsigned int msr)
+int reserve_perfctr_nmi(unsigned int msr)
+{
+ int cpu, i;
+ for_each_possible_cpu (cpu) {
+ if (!__reserve_perfctr_nmi(cpu, msr)) {
+ for_each_possible_cpu (i) {
+ if (i >= cpu)
+ break;
+ __release_perfctr_nmi(i, msr);
+ }
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+ int cpu;
+ for_each_possible_cpu (cpu) {
+ __release_perfctr_nmi(cpu, msr);
+ }
+}
+
+int __reserve_evntsel_nmi(int cpu, unsigned int msr)
{
unsigned int counter;
+ if (cpu < 0)
+ cpu = smp_processor_id();
counter = nmi_evntsel_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
+ if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
return 1;
return 0;
}
-void release_evntsel_nmi(unsigned int msr)
+static void __release_evntsel_nmi(int cpu, unsigned int msr)
{
unsigned int counter;
+ if (cpu < 0)
+ cpu = smp_processor_id();
counter = nmi_evntsel_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
+ clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+ int cpu, i;
+ for_each_possible_cpu (cpu) {
+ if (!__reserve_evntsel_nmi(cpu, msr)) {
+ for_each_possible_cpu (i) {
+ if (i >= cpu)
+ break;
+ __release_evntsel_nmi(i, msr);
+ }
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+ int cpu;
+ for_each_possible_cpu (cpu) {
+ __release_evntsel_nmi(cpu, msr);
+ }
}
static __cpuinit inline int nmi_known_cpu(void)
@@ -245,14 +311,6 @@ static int __init check_nmi_watchdog(void)
unsigned int *prev_nmi_count;
int cpu;
- /* Enable NMI watchdog for newer systems.
- Probably safe on most older systems too, but let's be careful.
- IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
- which hangs the system. Disable watchdog for all thinkpads */
- if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
- !dmi_name_in_vendors("ThinkPad"))
- nmi_watchdog = NMI_LOCAL_APIC;
-
if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
return 0;
@@ -271,7 +329,7 @@ static int __init check_nmi_watchdog(void)
for_each_possible_cpu(cpu)
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
local_irq_enable();
- mdelay((10*1000)/nmi_hz); // wait 10 ticks
+ mdelay((20*1000)/nmi_hz); // wait 20 ticks
for_each_possible_cpu(cpu) {
#ifdef CONFIG_SMP
@@ -515,10 +573,10 @@ static int setup_k7_watchdog(void)
perfctr_msr = MSR_K7_PERFCTR0;
evntsel_msr = MSR_K7_EVNTSEL0;
- if (!reserve_perfctr_nmi(perfctr_msr))
+ if (!__reserve_perfctr_nmi(-1, perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(evntsel_msr))
+ if (!__reserve_evntsel_nmi(-1, evntsel_msr))
goto fail1;
wrmsrl(perfctr_msr, 0UL);
@@ -541,7 +599,7 @@ static int setup_k7_watchdog(void)
wd->check_bit = 1ULL<<63;
return 1;
fail1:
- release_perfctr_nmi(perfctr_msr);
+ __release_perfctr_nmi(-1, perfctr_msr);
fail:
return 0;
}
@@ -552,8 +610,8 @@ static void stop_k7_watchdog(void)
wrmsr(wd->evntsel_msr, 0, 0);
- release_evntsel_nmi(wd->evntsel_msr);
- release_perfctr_nmi(wd->perfctr_msr);
+ __release_evntsel_nmi(-1, wd->evntsel_msr);
+ __release_perfctr_nmi(-1, wd->perfctr_msr);
}
#define P6_EVNTSEL0_ENABLE (1 << 22)
@@ -571,10 +629,10 @@ static int setup_p6_watchdog(void)
perfctr_msr = MSR_P6_PERFCTR0;
evntsel_msr = MSR_P6_EVNTSEL0;
- if (!reserve_perfctr_nmi(perfctr_msr))
+ if (!__reserve_perfctr_nmi(-1, perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(evntsel_msr))
+ if (!__reserve_evntsel_nmi(-1, evntsel_msr))
goto fail1;
wrmsrl(perfctr_msr, 0UL);
@@ -598,7 +656,7 @@ static int setup_p6_watchdog(void)
wd->check_bit = 1ULL<<39;
return 1;
fail1:
- release_perfctr_nmi(perfctr_msr);
+ __release_perfctr_nmi(-1, perfctr_msr);
fail:
return 0;
}
@@ -609,8 +667,8 @@ static void stop_p6_watchdog(void)
wrmsr(wd->evntsel_msr, 0, 0);
- release_evntsel_nmi(wd->evntsel_msr);
- release_perfctr_nmi(wd->perfctr_msr);
+ __release_evntsel_nmi(-1, wd->evntsel_msr);
+ __release_perfctr_nmi(-1, wd->perfctr_msr);
}
/* Note that these events don't tick when the CPU idles. This means
@@ -676,10 +734,10 @@ static int setup_p4_watchdog(void)
cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
}
- if (!reserve_perfctr_nmi(perfctr_msr))
+ if (!__reserve_perfctr_nmi(-1, perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(evntsel_msr))
+ if (!__reserve_evntsel_nmi(-1, evntsel_msr))
goto fail1;
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -703,7 +761,7 @@ static int setup_p4_watchdog(void)
wd->check_bit = 1ULL<<39;
return 1;
fail1:
- release_perfctr_nmi(perfctr_msr);
+ __release_perfctr_nmi(-1, perfctr_msr);
fail:
return 0;
}
@@ -715,8 +773,8 @@ static void stop_p4_watchdog(void)
wrmsr(wd->cccr_msr, 0, 0);
wrmsr(wd->evntsel_msr, 0, 0);
- release_evntsel_nmi(wd->evntsel_msr);
- release_perfctr_nmi(wd->perfctr_msr);
+ __release_evntsel_nmi(-1, wd->evntsel_msr);
+ __release_perfctr_nmi(-1, wd->perfctr_msr);
}
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
@@ -744,10 +802,10 @@ static int setup_intel_arch_watchdog(void)
perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
- if (!reserve_perfctr_nmi(perfctr_msr))
+ if (!__reserve_perfctr_nmi(-1, perfctr_msr))
goto fail;
- if (!reserve_evntsel_nmi(evntsel_msr))
+ if (!__reserve_evntsel_nmi(-1, evntsel_msr))
goto fail1;
wrmsrl(perfctr_msr, 0UL);
@@ -772,7 +830,7 @@ static int setup_intel_arch_watchdog(void)
wd->check_bit = 1ULL << (eax.split.bit_width - 1);
return 1;
fail1:
- release_perfctr_nmi(perfctr_msr);
+ __release_perfctr_nmi(-1, perfctr_msr);
fail:
return 0;
}
@@ -795,8 +853,8 @@ static void stop_intel_arch_watchdog(void)
return;
wrmsr(wd->evntsel_msr, 0, 0);
- release_evntsel_nmi(wd->evntsel_msr);
- release_perfctr_nmi(wd->perfctr_msr);
+ __release_evntsel_nmi(-1, wd->evntsel_msr);
+ __release_perfctr_nmi(-1, wd->perfctr_msr);
}
void setup_apic_nmi_watchdog (void *unused)
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index c156ecfa3872..2ec331e03fa9 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -32,6 +32,7 @@
#include <asm/fixmap.h>
#include <asm/apic.h>
#include <asm/tlbflush.h>
+#include <asm/timer.h>
/* nop stub */
static void native_nop(void)
@@ -493,7 +494,7 @@ struct paravirt_ops paravirt_ops = {
.memory_setup = machine_specific_memory_setup,
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
- .time_init = time_init_hook,
+ .time_init = hpet_time_init,
.init_IRQ = native_init_IRQ,
.cpuid = native_cpuid,
@@ -520,6 +521,8 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
+ .get_scheduled_cycles = native_read_tsc,
+ .get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
@@ -535,7 +538,6 @@ struct paravirt_ops paravirt_ops = {
.set_iopl_mask = native_set_iopl_mask,
.io_delay = native_io_delay,
- .const_udelay = __const_udelay,
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write,
@@ -550,6 +552,8 @@ struct paravirt_ops paravirt_ops = {
.flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single,
+ .map_pt_hook = (void *)native_nop,
+
.alloc_pt = (void *)native_nop,
.alloc_pd = (void *)native_nop,
.alloc_pd_clone = (void *)native_nop,
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
index 41af692c1584..3ebcea033623 100644
--- a/arch/i386/kernel/pci-dma.c
+++ b/arch/i386/kernel/pci-dma.c
@@ -110,7 +110,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
return DMA_MEMORY_IO;
free1_out:
- kfree(dev->dma_mem->bitmap);
+ kfree(dev->dma_mem);
out:
if (mem_base)
iounmap(mem_base);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index bea304d48cdb..393a67d5d943 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -49,7 +49,6 @@
#include <asm/i387.h>
#include <asm/desc.h>
#include <asm/vm86.h>
-#include <asm/idle.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
@@ -82,42 +81,6 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&idle_notifier, n);
-}
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-
-static DEFINE_PER_CPU(volatile unsigned long, idle_state);
-
-void enter_idle(void)
-{
- /* needs to be atomic w.r.t. interrupts, not against other CPUs */
- __set_bit(0, &__get_cpu_var(idle_state));
- atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
-}
-
-static void __exit_idle(void)
-{
- /* needs to be atomic w.r.t. interrupts, not against other CPUs */
- if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0)
- return;
- atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
-}
-
-void exit_idle(void)
-{
- if (current->pid)
- return;
- __exit_idle();
-}
-
void disable_hlt(void)
{
hlt_counter++;
@@ -168,7 +131,6 @@ EXPORT_SYMBOL(default_idle);
*/
static void poll_idle (void)
{
- local_irq_enable();
cpu_relax();
}
@@ -229,16 +191,7 @@ void cpu_idle(void)
play_dead();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
-
- /*
- * Idle routines should keep interrupts disabled
- * from here on, until they go to idle.
- * Otherwise, idle callbacks can misfire.
- */
- local_irq_disable();
- enter_idle();
idle();
- __exit_idle();
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
@@ -293,11 +246,7 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
- __sti_mwait(eax, ecx);
- else
- local_irq_enable();
- } else {
- local_irq_enable();
+ __mwait(eax, ecx);
}
}
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 122623dcc6e1..698c24fe482e 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -657,5 +657,4 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
- tsc_init();
}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 9bd9637ae692..0e8977871b1f 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -23,7 +23,6 @@
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
-#include <asm/idle.h>
#include <mach_apic.h>
/*
@@ -624,7 +623,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
/*
* At this point the info structure may be out of scope unless wait==1
*/
- exit_idle();
irq_enter();
(*func)(info);
irq_exit();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 48bfcaa13ecc..4ff55e675576 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -33,11 +33,6 @@
* Dave Jones : Report invalid combinations of Athlon CPUs.
* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
-
-/* SMP boot always wants to use real time delay to allow sufficient time for
- * the APs to come online */
-#define USE_REAL_TIME_DELAY
-
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -50,6 +45,7 @@
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
+#include <linux/nmi.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
@@ -1283,8 +1279,9 @@ void __cpu_die(unsigned int cpu)
int __cpuinit __cpu_up(unsigned int cpu)
{
+ unsigned long flags;
#ifdef CONFIG_HOTPLUG_CPU
- int ret=0;
+ int ret = 0;
/*
* We do warm boot only on cpus that had booted earlier
@@ -1302,23 +1299,25 @@ int __cpuinit __cpu_up(unsigned int cpu)
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
- local_irq_enable();
return -EIO;
}
- local_irq_enable();
-
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
/*
- * Check TSC synchronization with the AP:
+ * Check TSC synchronization with the AP (keep irqs disabled
+ * while doing so):
*/
+ local_irq_save(flags);
check_tsc_sync_source(cpu);
+ local_irq_restore(flags);
- while (!cpu_isset(cpu, cpu_online_map))
+ while (!cpu_isset(cpu, cpu_online_map)) {
cpu_relax();
+ touch_nmi_watchdog();
+ }
#ifdef CONFIG_X86_GENERICARCH
if (num_online_cpus() > 8 && genapic == &apic_default)
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a5350059557a..94e5cb091104 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -262,14 +262,23 @@ void notify_arch_cmos_timer(void)
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
-static void __init hpet_time_init(void)
+void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
- do_time_init();
+ time_init_hook();
}
+/*
+ * This is called directly from init code; we must delay timer setup in the
+ * HPET case as we can't make the decision to turn on HPET this early in the
+ * boot process.
+ *
+ * The chosen time_init function will usually be hpet_time_init, above, but
+ * in the case of virtual hardware, an alternative function may be substituted.
+ */
void __init time_init(void)
{
- late_time_init = hpet_time_init;
+ tsc_init();
+ late_time_init = choose_time_init();
}
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c
index 79cf608e14ca..45782356a618 100644
--- a/arch/i386/kernel/topology.c
+++ b/arch/i386/kernel/topology.c
@@ -1,5 +1,5 @@
/*
- * arch/i386/kernel/topology.c - Populate driverfs with topology information
+ * arch/i386/kernel/topology.c - Populate sysfs with topology information
*
* Written by: Matthew Dobson, IBM Corporation
* Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 3082a418635c..6cb8f5336732 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -14,16 +14,18 @@
#include <asm/delay.h>
#include <asm/tsc.h>
#include <asm/io.h>
+#include <asm/timer.h>
#include "mach_timer.h"
+static int tsc_enabled;
+
/*
* On some systems the TSC frequency does not
* change with the cpu frequency. So we need
* an extra value to store the TSC freq
*/
unsigned int tsc_khz;
-unsigned long long (*custom_sched_clock)(void);
int tsc_disable;
@@ -102,24 +104,21 @@ unsigned long long sched_clock(void)
{
unsigned long long this_offset;
- if (unlikely(custom_sched_clock))
- return (*custom_sched_clock)();
-
/*
* Fall back to jiffies if there's no TSC available:
*/
- if (unlikely(tsc_disable))
+ if (unlikely(!tsc_enabled))
/* No locking but a rare wrong value is not a big deal: */
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */
- rdtscll(this_offset);
+ get_scheduled_cycles(this_offset);
/* return the value in ns */
return cycles_2_ns(this_offset);
}
-static unsigned long calculate_cpu_khz(void)
+unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
unsigned long count;
@@ -186,34 +185,6 @@ int recalibrate_cpu_khz(void)
EXPORT_SYMBOL(recalibrate_cpu_khz);
-void __init tsc_init(void)
-{
- if (!cpu_has_tsc || tsc_disable)
- goto out_no_tsc;
-
- cpu_khz = calculate_cpu_khz();
- tsc_khz = cpu_khz;
-
- if (!cpu_khz)
- goto out_no_tsc;
-
- printk("Detected %lu.%03lu MHz processor.\n",
- (unsigned long)cpu_khz / 1000,
- (unsigned long)cpu_khz % 1000);
-
- set_cyc2ns_scale(cpu_khz);
- use_tsc_delay();
- return;
-
-out_no_tsc:
- /*
- * Set the tsc_disable flag if there's no TSC support, this
- * makes it a fast flag for the kernel to see whether it
- * should be using the TSC.
- */
- tsc_disable = 1;
-}
-
#ifdef CONFIG_CPU_FREQ
/*
@@ -314,6 +285,7 @@ void mark_tsc_unstable(void)
{
if (!tsc_unstable) {
tsc_unstable = 1;
+ tsc_enabled = 0;
/* Can be called before registration */
if (clocksource_tsc.mult)
clocksource_change_rating(&clocksource_tsc, 0);
@@ -383,28 +355,49 @@ static void __init check_geode_tsc_reliable(void)
static inline void check_geode_tsc_reliable(void) { }
#endif
-static int __init init_tsc_clocksource(void)
+
+void __init tsc_init(void)
{
+ if (!cpu_has_tsc || tsc_disable)
+ goto out_no_tsc;
+
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+
+ if (!cpu_khz)
+ goto out_no_tsc;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ set_cyc2ns_scale(cpu_khz);
+ use_tsc_delay();
- if (cpu_has_tsc && tsc_khz && !tsc_disable) {
- /* check blacklist */
- dmi_check_system(bad_tsc_dmi_table);
+ /* Check and install the TSC clocksource */
+ dmi_check_system(bad_tsc_dmi_table);
- unsynchronized_tsc();
- check_geode_tsc_reliable();
- current_tsc_khz = tsc_khz;
- clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ unsynchronized_tsc();
+ check_geode_tsc_reliable();
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
clocksource_tsc.shift);
- /* lower the rating if we already know its unstable: */
- if (check_tsc_unstable()) {
- clocksource_tsc.rating = 0;
- clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- }
+ /* lower the rating if we already know its unstable: */
+ if (check_tsc_unstable()) {
+ clocksource_tsc.rating = 0;
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
+ } else
+ tsc_enabled = 1;
- return clocksource_register(&clocksource_tsc);
- }
+ clocksource_register(&clocksource_tsc);
- return 0;
-}
+ return;
-module_init(init_tsc_clocksource);
+out_no_tsc:
+ /*
+ * Set the tsc_disable flag if there's no TSC support, this
+ * makes it a fast flag for the kernel to see whether it
+ * should be using the TSC.
+ */
+ tsc_disable = 1;
+}
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index bb5a7abf949c..697a70e8c0c9 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -23,7 +23,6 @@
*/
#include <linux/module.h>
-#include <linux/license.h>
#include <linux/cpu.h>
#include <linux/bootmem.h>
#include <linux/mm.h>
@@ -35,6 +34,7 @@
#include <asm/processor.h>
#include <asm/timer.h>
#include <asm/vmi_time.h>
+#include <asm/kmap_types.h>
/* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -47,13 +47,13 @@ typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int);
(((VROMLONGFUNC *)(rom->func)) (arg))
static struct vrom_header *vmi_rom;
-static int license_gplok;
-static int disable_nodelay;
static int disable_pge;
static int disable_pse;
static int disable_sep;
static int disable_tsc;
static int disable_mtrr;
+static int disable_noidle;
+static int disable_vmi_timer;
/* Cached VMI operations */
struct {
@@ -69,6 +69,7 @@ struct {
void (*flush_tlb)(int);
void (*set_initial_ap_state)(int, int);
void (*halt)(void);
+ void (*set_lazy_mode)(int mode);
} vmi_ops;
/* XXX move this to alternative.h */
@@ -255,7 +256,6 @@ static void vmi_nop(void)
}
/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
-#ifdef CONFIG_NO_IDLE_HZ
static fastcall void vmi_safe_halt(void)
{
int idle = vmi_stop_hz_timer();
@@ -266,7 +266,6 @@ static fastcall void vmi_safe_halt(void)
local_irq_enable();
}
}
-#endif
#ifdef CONFIG_DEBUG_PAGE_TYPE
@@ -371,6 +370,24 @@ static void vmi_check_page_type(u32 pfn, int type)
#define vmi_check_page_type(p,t) do { } while (0)
#endif
+static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
+{
+ /*
+ * Internally, the VMI ROM must map virtual addresses to physical
+ * addresses for processing MMU updates. By the time MMU updates
+ * are issued, this information is typically already lost.
+ * Fortunately, the VMI provides a cache of mapping slots for active
+ * page tables.
+ *
+ * We use slot zero for the linear mapping of physical memory, and
+ * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1.
+ *
+ * args: SLOT VA COUNT PFN
+ */
+ BUG_ON(type != KM_PTE0 && type != KM_PTE1);
+ vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn);
+}
+
static void vmi_allocate_pt(u32 pfn)
{
vmi_set_page_type(pfn, VMI_PAGE_L1);
@@ -508,13 +525,14 @@ void vmi_pmd_clear(pmd_t *pmd)
#endif
#ifdef CONFIG_SMP
-struct vmi_ap_state ap;
extern void setup_pda(void);
-static void __init /* XXX cpu hotplug */
+static void __devinit
vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
unsigned long start_esp)
{
+ struct vmi_ap_state ap;
+
/* Default everything to zero. This is fine for most GPRs. */
memset(&ap, 0, sizeof(struct vmi_ap_state));
@@ -553,10 +571,30 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
/* Protected mode, paging, AM, WP, NE, MP. */
ap.cr0 = 0x80050023;
ap.cr4 = mmu_cr4_features;
- vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid);
+ vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid);
}
#endif
+static void vmi_set_lazy_mode(int mode)
+{
+ static DEFINE_PER_CPU(int, lazy_mode);
+
+ if (!vmi_ops.set_lazy_mode)
+ return;
+
+ /* Modes should never nest or overlap */
+ BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE ||
+ mode == PARAVIRT_LAZY_FLUSH));
+
+ if (mode == PARAVIRT_LAZY_FLUSH) {
+ vmi_ops.set_lazy_mode(0);
+ vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode));
+ } else {
+ vmi_ops.set_lazy_mode(mode);
+ __get_cpu_var(lazy_mode) = mode;
+ }
+}
+
static inline int __init check_vmi_rom(struct vrom_header *rom)
{
struct pci_header *pci;
@@ -610,13 +648,14 @@ static inline int __init check_vmi_rom(struct vrom_header *rom)
rom->api_version_maj, rom->api_version_min,
pci->rom_version_maj, pci->rom_version_min);
- license_gplok = license_is_gpl_compatible(license);
- if (!license_gplok) {
- printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... "
- "inlining disabled\n",
- license);
- add_taint(TAINT_PROPRIETARY_MODULE);
- }
+ /* Don't allow BSD/MIT here for now because we don't want to end up
+ with any binary only shim layers */
+ if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) {
+ printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n",
+ license);
+ return 0;
+ }
+
return 1;
}
@@ -645,12 +684,12 @@ static inline int __init probe_vmi_rom(void)
void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
- if (vmi_rom)
+ if (vmi_ops.set_linear_mapping)
vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
}
/*
- * Return a pointer to the VMI function or a NOP stub
+ * Return a pointer to a VMI function or NULL if unimplemented
*/
static void *vmi_get_function(int vmicall)
{
@@ -661,23 +700,47 @@ static void *vmi_get_function(int vmicall)
if (rel->type == VMI_RELOCATION_CALL_REL)
return (void *)rel->eip;
else
- return (void *)vmi_nop;
+ return NULL;
}
/*
* Helper macro for making the VMI paravirt-ops fill code readable.
- * For unimplemented operations, fall back to default.
+ * For unimplemented operations, fall back to default, unless nop
+ * is returned by the ROM.
*/
#define para_fill(opname, vmicall) \
do { \
reloc = call_vrom_long_func(vmi_rom, get_reloc, \
VMI_CALL_##vmicall); \
- if (rel->type != VMI_RELOCATION_NONE) { \
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \
+ if (rel->type == VMI_RELOCATION_CALL_REL) \
paravirt_ops.opname = (void *)rel->eip; \
+ else if (rel->type == VMI_RELOCATION_NOP) \
+ paravirt_ops.opname = (void *)vmi_nop; \
+ else if (rel->type != VMI_RELOCATION_NONE) \
+ printk(KERN_WARNING "VMI: Unknown relocation " \
+ "type %d for " #vmicall"\n",\
+ rel->type); \
+} while (0)
+
+/*
+ * Helper macro for making the VMI paravirt-ops fill code readable.
+ * For cached operations which do not match the VMI ROM ABI and must
+ * go through a tranlation stub. Ignore NOPs, since it is not clear
+ * a NOP * VMI function corresponds to a NOP paravirt-op when the
+ * functions are not in 1-1 correspondence.
+ */
+#define para_wrap(opname, wrapper, cache, vmicall) \
+do { \
+ reloc = call_vrom_long_func(vmi_rom, get_reloc, \
+ VMI_CALL_##vmicall); \
+ BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
+ if (rel->type == VMI_RELOCATION_CALL_REL) { \
+ paravirt_ops.opname = wrapper; \
+ vmi_ops.cache = (void *)rel->eip; \
} \
} while (0)
+
/*
* Activate the VMI interface and switch into paravirtualized mode
*/
@@ -714,13 +777,8 @@ static inline int __init activate_vmi(void)
* rdpmc is not yet used in Linux
*/
- /* CPUID is special, so very special */
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops.cpuid = (void *)rel->eip;
- paravirt_ops.cpuid = vmi_cpuid;
- }
+ /* CPUID is special, so very special it gets wrapped like a present */
+ para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
para_fill(clts, CLTS);
para_fill(get_debugreg, GetDR);
@@ -737,38 +795,26 @@ static inline int __init activate_vmi(void)
para_fill(restore_fl, SetInterruptMask);
para_fill(irq_disable, DisableInterrupts);
para_fill(irq_enable, EnableInterrupts);
+
/* irq_save_disable !!! sheer pain */
patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
(char *)paravirt_ops.save_fl);
patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
(char *)paravirt_ops.irq_disable);
-#ifndef CONFIG_NO_IDLE_HZ
- para_fill(safe_halt, Halt);
-#else
- vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
- paravirt_ops.safe_halt = vmi_safe_halt;
-#endif
+
para_fill(wbinvd, WBINVD);
+ para_fill(read_tsc, RDTSC);
+
+ /* The following we emulate with trap and emulate for now */
/* paravirt_ops.read_msr = vmi_rdmsr */
/* paravirt_ops.write_msr = vmi_wrmsr */
- para_fill(read_tsc, RDTSC);
/* paravirt_ops.rdpmc = vmi_rdpmc */
- /* TR interface doesn't pass TR value */
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops.set_tr = (void *)rel->eip;
- paravirt_ops.load_tr_desc = vmi_set_tr;
- }
+ /* TR interface doesn't pass TR value, wrap */
+ para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
/* LDT is special, too */
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops._set_ldt = (void *)rel->eip;
- paravirt_ops.set_ldt = vmi_set_ldt;
- }
+ para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
para_fill(load_gdt, SetGDT);
para_fill(load_idt, SetIDT);
@@ -779,28 +825,14 @@ static inline int __init activate_vmi(void)
para_fill(write_ldt_entry, WriteLDTEntry);
para_fill(write_gdt_entry, WriteGDTEntry);
para_fill(write_idt_entry, WriteIDTEntry);
- reloc = call_vrom_long_func(vmi_rom, get_reloc,
- VMI_CALL_UpdateKernelStack);
- if (rel->type != VMI_RELOCATION_NONE) {
- BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
- vmi_ops.set_kernel_stack = (void *)rel->eip;
- paravirt_ops.load_esp0 = vmi_load_esp0;
- }
-
+ para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
para_fill(set_iopl_mask, SetIOPLMask);
- paravirt_ops.io_delay = (void *)vmi_nop;
- if (!disable_nodelay) {
- paravirt_ops.const_udelay = (void *)vmi_nop;
- }
-
- para_fill(set_lazy_mode, SetLazyMode);
+ para_fill(io_delay, IODelay);
+ para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
- reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB);
- if (rel->type != VMI_RELOCATION_NONE) {
- vmi_ops.flush_tlb = (void *)rel->eip;
- paravirt_ops.flush_tlb_user = vmi_flush_tlb_user;
- paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel;
- }
+ /* user and kernel flush are just handled with different flags to FlushTLB */
+ para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB);
+ para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB);
para_fill(flush_tlb_single, InvalPage);
/*
@@ -815,27 +847,40 @@ static inline int __init activate_vmi(void)
vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
#endif
- vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
- vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
- vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
- paravirt_ops.alloc_pt = vmi_allocate_pt;
- paravirt_ops.alloc_pd = vmi_allocate_pd;
- paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
- paravirt_ops.release_pt = vmi_release_pt;
- paravirt_ops.release_pd = vmi_release_pd;
- paravirt_ops.set_pte = vmi_set_pte;
- paravirt_ops.set_pte_at = vmi_set_pte_at;
- paravirt_ops.set_pmd = vmi_set_pmd;
- paravirt_ops.pte_update = vmi_update_pte;
- paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+ if (vmi_ops.set_pte) {
+ paravirt_ops.set_pte = vmi_set_pte;
+ paravirt_ops.set_pte_at = vmi_set_pte_at;
+ paravirt_ops.set_pmd = vmi_set_pmd;
#ifdef CONFIG_X86_PAE
- paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
- paravirt_ops.set_pte_present = vmi_set_pte_present;
- paravirt_ops.set_pud = vmi_set_pud;
- paravirt_ops.pte_clear = vmi_pte_clear;
- paravirt_ops.pmd_clear = vmi_pmd_clear;
+ paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
+ paravirt_ops.set_pte_present = vmi_set_pte_present;
+ paravirt_ops.set_pud = vmi_set_pud;
+ paravirt_ops.pte_clear = vmi_pte_clear;
+ paravirt_ops.pmd_clear = vmi_pmd_clear;
#endif
+ }
+
+ if (vmi_ops.update_pte) {
+ paravirt_ops.pte_update = vmi_update_pte;
+ paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+ }
+
+ vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
+ if (vmi_ops.allocate_page) {
+ paravirt_ops.alloc_pt = vmi_allocate_pt;
+ paravirt_ops.alloc_pd = vmi_allocate_pd;
+ paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+ }
+
+ vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
+ if (vmi_ops.release_page) {
+ paravirt_ops.release_pt = vmi_release_pt;
+ paravirt_ops.release_pd = vmi_release_pd;
+ }
+ para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping,
+ SetLinearMapping);
+
/*
* These MUST always be patched. Don't support indirect jumps
* through these operations, as the VMI interface may use either
@@ -847,21 +892,20 @@ static inline int __init activate_vmi(void)
paravirt_ops.iret = (void *)0xbadbab0;
#ifdef CONFIG_SMP
- paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook;
- vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState);
+ para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead);
- paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite);
- paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
+ para_fill(apic_read, APICRead);
+ para_fill(apic_write, APICWrite);
+ para_fill(apic_write_atomic, APICWrite);
#endif
/*
* Check for VMI timer functionality by probing for a cycle frequency method
*/
reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
- if (rel->type != VMI_RELOCATION_NONE) {
+ if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) {
vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
vmi_timer_ops.get_cycle_counter =
vmi_get_function(VMI_CALL_GetCycleCounter);
@@ -879,9 +923,22 @@ static inline int __init activate_vmi(void)
paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
#endif
- custom_sched_clock = vmi_sched_clock;
+ paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
+ paravirt_ops.get_cpu_khz = vmi_cpu_khz;
+
+ /* We have true wallclock functions; disable CMOS clock sync */
+ no_sync_cmos_clock = 1;
+ } else {
+ disable_noidle = 1;
+ disable_vmi_timer = 1;
}
+ /* No idle HZ mode only works if VMI timer and no idle is enabled */
+ if (disable_noidle || disable_vmi_timer)
+ para_fill(safe_halt, Halt);
+ else
+ para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
+
/*
* Alternative instruction rewriting doesn't happen soon enough
* to convert VMI_IRET to a call instead of a jump; so we have
@@ -914,7 +971,9 @@ void __init vmi_init(void)
local_irq_save(flags);
activate_vmi();
-#ifdef CONFIG_SMP
+
+#ifdef CONFIG_X86_IO_APIC
+ /* This is virtual hardware; timer routing is wired correctly */
no_timer_check = 1;
#endif
local_irq_restore(flags & X86_EFLAGS_IF);
@@ -925,9 +984,7 @@ static int __init parse_vmi(char *arg)
if (!arg)
return -EINVAL;
- if (!strcmp(arg, "disable_nodelay"))
- disable_nodelay = 1;
- else if (!strcmp(arg, "disable_pge")) {
+ if (!strcmp(arg, "disable_pge")) {
clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
disable_pge = 1;
} else if (!strcmp(arg, "disable_pse")) {
@@ -942,7 +999,11 @@ static int __init parse_vmi(char *arg)
} else if (!strcmp(arg, "disable_mtrr")) {
clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
disable_mtrr = 1;
- }
+ } else if (!strcmp(arg, "disable_timer")) {
+ disable_vmi_timer = 1;
+ disable_noidle = 1;
+ } else if (!strcmp(arg, "disable_noidle"))
+ disable_noidle = 1;
return 0;
}
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 76d2adcae5a3..9dfb17739b67 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c
@@ -123,12 +123,10 @@ static struct clocksource clocksource_vmi = {
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
static struct irqaction vmi_timer_irq = {
- vmi_timer_interrupt,
- SA_INTERRUPT,
- CPU_MASK_NONE,
- "VMI-alarm",
- NULL,
- NULL
+ .handler = vmi_timer_interrupt,
+ .flags = IRQF_DISABLED,
+ .mask = CPU_MASK_NONE,
+ .name = "VMI-alarm",
};
/* Alarm rate */
@@ -153,13 +151,6 @@ static void vmi_get_wallclock_ts(struct timespec *ts)
ts->tv_sec = wallclock;
}
-static void update_xtime_from_wallclock(void)
-{
- struct timespec ts;
- vmi_get_wallclock_ts(&ts);
- do_settimeofday(&ts);
-}
-
unsigned long vmi_get_wallclock(void)
{
struct timespec ts;
@@ -172,11 +163,20 @@ int vmi_set_wallclock(unsigned long now)
return -1;
}
-unsigned long long vmi_sched_clock(void)
+unsigned long long vmi_get_sched_cycles(void)
{
return read_available_cycles();
}
+unsigned long vmi_cpu_khz(void)
+{
+ unsigned long long khz;
+
+ khz = vmi_timer_ops.get_cycle_frequency();
+ (void)do_div(khz, 1000);
+ return khz;
+}
+
void __init vmi_time_init(void)
{
unsigned long long cycles_per_sec, cycles_per_msec;
@@ -188,25 +188,16 @@ void __init vmi_time_init(void)
set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
#endif
- no_sync_cmos_clock = 1;
-
- vmi_get_wallclock_ts(&xtime);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
real_cycles_accounted_system = read_real_cycles();
- update_xtime_from_wallclock();
per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
-
cycles_per_jiffy = cycles_per_sec;
(void)do_div(cycles_per_jiffy, HZ);
cycles_per_alarm = cycles_per_sec;
(void)do_div(cycles_per_alarm, alarm_hz);
cycles_per_msec = cycles_per_sec;
(void)do_div(cycles_per_msec, 1000);
- cpu_khz = cycles_per_msec;
printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
"cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
@@ -250,7 +241,7 @@ void __init vmi_timer_setup_boot_alarm(void)
/* Initialize the time accounting variables for an AP on an SMP system.
* Also, set the local alarm for the AP. */
-void __init vmi_timer_setup_secondary_alarm(void)
+void __devinit vmi_timer_setup_secondary_alarm(void)
{
int cpu = smp_processor_id();
@@ -276,16 +267,13 @@ static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
while (cycles_not_accounted >= cycles_per_jiffy) {
- /* systems wide jiffies and wallclock. */
+ /* systems wide jiffies. */
do_timer(1);
cycles_not_accounted -= cycles_per_jiffy;
real_cycles_accounted_system += cycles_per_jiffy;
}
- if (vmi_timer_ops.wallclock_updated())
- update_xtime_from_wallclock();
-
write_sequnlock(&xtime_lock);
}
@@ -380,7 +368,6 @@ int vmi_stop_hz_timer(void)
unsigned long seq, next;
unsigned long long real_cycles_expiry;
int cpu = smp_processor_id();
- int idle;
BUG_ON(!irqs_disabled());
if (sysctl_hz_timer != 0)
@@ -388,13 +375,13 @@ int vmi_stop_hz_timer(void)
cpu_set(cpu, nohz_cpu_mask);
smp_mb();
+
if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
- (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
+ (next = next_timer_interrupt(),
+ time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
cpu_clear(cpu, nohz_cpu_mask);
- next = jiffies;
- idle = 0;
- } else
- idle = 1;
+ return 0;
+ }
/* Convert jiffies to the real cycle counter. */
do {
@@ -404,17 +391,13 @@ int vmi_stop_hz_timer(void)
} while (read_seqretry(&xtime_lock, seq));
/* This cpu is going idle. Disable the periodic alarm. */
- if (idle) {
- vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
- per_cpu(idle_start_jiffies, cpu) = jiffies;
- }
-
+ vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
+ per_cpu(idle_start_jiffies, cpu) = jiffies;
/* Set the real time alarm to expire at the next event. */
vmi_timer_ops.set_alarm(
- VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
- real_cycles_expiry, 0);
-
- return idle;
+ VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
+ real_cycles_expiry, 0);
+ return 1;
}
static void vmi_reenable_hz_timer(int cpu)
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index ca51610955df..6f38f818380b 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -26,7 +26,7 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
jiffies = jiffies_64;
-_proxy_pda = 0;
+_proxy_pda = 1;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */