summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c23
-rw-r--r--arch/x86/kernel/cpu/perf_event.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c91
6 files changed, 138 insertions, 90 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 0ed633c5048b..6199232161cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
static cpumask_var_t mce_inject_cpumask;
-static int mce_raise_notify(struct notifier_block *self,
- unsigned long val, void *data)
+static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
{
- struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
- if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
- return NOTIFY_DONE;
+ if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
+ return NMI_DONE;
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
- raise_exception(m, args->regs);
+ raise_exception(m, regs);
else if (m->status)
raise_poll(m);
- return NOTIFY_STOP;
+ return NMI_HANDLED;
}
-static struct notifier_block mce_raise_nb = {
- .notifier_call = mce_raise_notify,
- .priority = NMI_LOCAL_NORMAL_PRIOR,
-};
-
/* Inject mce on current CPU */
static int raise_local(void)
{
@@ -216,7 +209,8 @@ static int inject_init(void)
return -ENOMEM;
printk(KERN_INFO "Machine check injector initialized\n");
mce_chrdev_ops.write = mce_write;
- register_die_notifier(&mce_raise_nb);
+ register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
+ "mce_notify");
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 08363b042122..fce51ad1f362 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
percpu_inc(mce_exception_count);
- if (notify_die(DIE_NMI, "machine check", regs, error_code,
- 18, SIGKILL) == NOTIFY_STOP)
- goto out;
if (!banks)
goto out;
@@ -1140,6 +1137,15 @@ static void mce_start_timer(unsigned long data)
add_timer_on(t, smp_processor_id());
}
+/* Must not be called in IRQ context where del_timer_sync() can deadlock */
+static void mce_timer_delete_all(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ del_timer_sync(&per_cpu(mce_timer, cpu));
+}
+
static void mce_do_trigger(struct work_struct *work)
{
call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
@@ -1750,7 +1756,6 @@ static struct syscore_ops mce_syscore_ops = {
static void mce_cpu_restart(void *data)
{
- del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
@@ -1760,16 +1765,15 @@ static void mce_cpu_restart(void *data)
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
+ mce_timer_delete_all();
on_each_cpu(mce_cpu_restart, NULL, 1);
}
/* Toggle features for corrected errors */
-static void mce_disable_ce(void *all)
+static void mce_disable_cmci(void *data)
{
if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
- if (all)
- del_timer_sync(&__get_cpu_var(mce_timer));
cmci_clear();
}
@@ -1852,7 +1856,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
if (mce_ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
- on_each_cpu(mce_disable_ce, (void *)1, 1);
+ mce_timer_delete_all();
+ on_each_cpu(mce_disable_cmci, NULL, 1);
mce_ignore_ce = 1;
} else {
/* enable ce features */
@@ -1875,7 +1880,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
if (mce_cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
- on_each_cpu(mce_disable_ce, NULL, 1);
+ on_each_cpu(mce_disable_cmci, NULL, 1);
mce_cmci_disabled = 1;
} else {
/* enable cmci */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8ab89112f93c..640891014b2a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1058,76 +1058,15 @@ void perf_events_lapic_init(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
-struct pmu_nmi_state {
- unsigned int marked;
- int handled;
-};
-
-static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
-
static int __kprobes
-perf_event_nmi_handler(struct notifier_block *self,
- unsigned long cmd, void *__args)
+perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
- struct die_args *args = __args;
- unsigned int this_nmi;
- int handled;
-
if (!atomic_read(&active_events))
- return NOTIFY_DONE;
-
- switch (cmd) {
- case DIE_NMI:
- break;
- case DIE_NMIUNKNOWN:
- this_nmi = percpu_read(irq_stat.__nmi_count);
- if (this_nmi != __this_cpu_read(pmu_nmi.marked))
- /* let the kernel handle the unknown nmi */
- return NOTIFY_DONE;
- /*
- * This one is a PMU back-to-back nmi. Two events
- * trigger 'simultaneously' raising two back-to-back
- * NMIs. If the first NMI handles both, the latter
- * will be empty and daze the CPU. So, we drop it to
- * avoid false-positive 'unknown nmi' messages.
- */
- return NOTIFY_STOP;
- default:
- return NOTIFY_DONE;
- }
-
- handled = x86_pmu.handle_irq(args->regs);
- if (!handled)
- return NOTIFY_DONE;
+ return NMI_DONE;
- this_nmi = percpu_read(irq_stat.__nmi_count);
- if ((handled > 1) ||
- /* the next nmi could be a back-to-back nmi */
- ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
- (__this_cpu_read(pmu_nmi.handled) > 1))) {
- /*
- * We could have two subsequent back-to-back nmis: The
- * first handles more than one counter, the 2nd
- * handles only one counter and the 3rd handles no
- * counter.
- *
- * This is the 2nd nmi because the previous was
- * handling more than one counter. We will mark the
- * next (3rd) and then drop it if unhandled.
- */
- __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
- __this_cpu_write(pmu_nmi.handled, handled);
- }
-
- return NOTIFY_STOP;
+ return x86_pmu.handle_irq(regs);
}
-static __read_mostly struct notifier_block perf_event_nmi_notifier = {
- .notifier_call = perf_event_nmi_handler,
- .next = NULL,
- .priority = NMI_LOCAL_LOW_PRIOR,
-};
-
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
@@ -1232,7 +1171,7 @@ static int __init init_hw_perf_events(void)
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
perf_events_lapic_init();
- register_die_notifier(&perf_event_nmi_notifier);
+ register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index fb330b0a816e..b9698d40ac4b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -131,6 +131,13 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
/*
+ * Intel host/guest exclude bits
+ */
+ u64 intel_ctrl_guest_mask;
+ u64 intel_ctrl_host_mask;
+ struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
+
+ /*
* manage shared (per-core, per-cpu) registers
* used on Intel NHM/WSM/SNB
*/
@@ -295,6 +302,11 @@ struct x86_pmu {
*/
struct extra_reg *extra_regs;
unsigned int er_flags;
+
+ /*
+ * Intel host/guest support (KVM)
+ */
+ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
};
#define ERF_NO_HT_SHARING 1
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 384450d67128..db8e603ff0c6 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -138,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (event->attr.exclude_host && event->attr.exclude_guest)
+ /*
+ * When HO == GO == 1 the hardware treats that as GO == HO == 0
+ * and will count in both modes. We don't want to count in that
+ * case so we emulate no-counting by setting US = OS = 0.
+ */
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
+ else if (event->attr.exclude_host)
+ event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+ else if (event->attr.exclude_guest)
+ event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 61fa35750b98..e09ca20e86ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -749,7 +749,8 @@ static void intel_pmu_enable_all(int added)
intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all();
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+ x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct perf_event *event =
@@ -872,6 +873,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
@@ -879,6 +881,9 @@ static void intel_pmu_disable_event(struct perf_event *event)
return;
}
+ cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
+ cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -924,6 +929,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
static void intel_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -933,6 +939,11 @@ static void intel_pmu_enable_event(struct perf_event *event)
return;
}
+ if (event->attr.exclude_host)
+ cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
+ if (event->attr.exclude_guest)
+ cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc);
return;
@@ -1302,12 +1313,84 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+ if (x86_pmu.guest_get_msrs)
+ return x86_pmu.guest_get_msrs(nr);
+ *nr = 0;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+
+ arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+ arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
+ arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+
+ *nr = 1;
+ return arr;
+}
+
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct perf_event *event = cpuc->events[idx];
+
+ arr[idx].msr = x86_pmu_config_addr(idx);
+ arr[idx].host = arr[idx].guest = 0;
+
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ arr[idx].host = arr[idx].guest =
+ event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ if (event->attr.exclude_host)
+ arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ else if (event->attr.exclude_guest)
+ arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+ }
+
+ *nr = x86_pmu.num_counters;
+ return arr;
+}
+
+static void core_pmu_enable_event(struct perf_event *event)
+{
+ if (!event->attr.exclude_host)
+ x86_pmu_enable_event(event);
+}
+
+static void core_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int idx;
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+ if (!test_bit(idx, cpuc->active_mask) ||
+ cpuc->events[idx]->attr.exclude_host)
+ continue;
+
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+ }
+}
+
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
- .enable_all = x86_pmu_enable_all,
- .enable = x86_pmu_enable_event,
+ .enable_all = core_pmu_enable_all,
+ .enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = x86_pmu_hw_config,
.schedule_events = x86_schedule_events,
@@ -1325,6 +1408,7 @@ static __initconst const struct x86_pmu core_pmu = {
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
+ .guest_get_msrs = core_guest_get_msrs,
};
struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1431,6 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .guest_get_msrs = intel_guest_get_msrs,
};
static void intel_clovertown_quirks(void)