diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kbuild | 3 | ||||
-rw-r--r-- | arch/x86/events/Makefile | 13 | ||||
-rw-r--r-- | arch/x86/events/amd/core.c (renamed from arch/x86/kernel/cpu/perf_event_amd.c) | 2 | ||||
-rw-r--r-- | arch/x86/events/amd/ibs.c (renamed from arch/x86/kernel/cpu/perf_event_amd_ibs.c) | 12 | ||||
-rw-r--r-- | arch/x86/events/amd/iommu.c (renamed from arch/x86/kernel/cpu/perf_event_amd_iommu.c) | 4 | ||||
-rw-r--r-- | arch/x86/events/amd/iommu.h (renamed from arch/x86/kernel/cpu/perf_event_amd_iommu.h) | 0 | ||||
-rw-r--r-- | arch/x86/events/amd/uncore.c (renamed from arch/x86/kernel/cpu/perf_event_amd_uncore.c) | 4 | ||||
-rw-r--r-- | arch/x86/events/core.c (renamed from arch/x86/kernel/cpu/perf_event.c) | 22 | ||||
-rw-r--r-- | arch/x86/events/intel/bts.c (renamed from arch/x86/kernel/cpu/perf_event_intel_bts.c) | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c (renamed from arch/x86/kernel/cpu/perf_event_intel.c) | 31 | ||||
-rw-r--r-- | arch/x86/events/intel/cqm.c (renamed from arch/x86/kernel/cpu/perf_event_intel_cqm.c) | 34 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c (renamed from arch/x86/kernel/cpu/perf_event_intel_cstate.c) | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c (renamed from arch/x86/kernel/cpu/perf_event_intel_ds.c) | 56 | ||||
-rw-r--r-- | arch/x86/events/intel/knc.c (renamed from arch/x86/kernel/cpu/perf_event_knc.c) | 6 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c (renamed from arch/x86/kernel/cpu/perf_event_intel_lbr.c) | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/p4.c (renamed from arch/x86/kernel/cpu/perf_event_p4.c) | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/p6.c (renamed from arch/x86/kernel/cpu/perf_event_p6.c) | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c (renamed from arch/x86/kernel/cpu/perf_event_intel_pt.c) | 4 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.h (renamed from arch/x86/kernel/cpu/intel_pt.h) | 0 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c (renamed from arch/x86/kernel/cpu/perf_event_intel_rapl.c) | 412 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore.c) | 677 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore.h) | 55 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_nhmex.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c) | 8 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c) | 16 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c (renamed from arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c) | 21 | ||||
-rw-r--r-- | arch/x86/events/msr.c (renamed from arch/x86/kernel/cpu/perf_event_msr.c) | 0 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h (renamed from arch/x86/kernel/cpu/perf_event.h) | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/elf.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/topology.h | 11 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 24 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/centaur.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cyrix.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/hypervisor.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel_cacheinfo.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p5.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/winchip.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/centaur.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/cleanup.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/generic.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/rdrand.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/transmeta.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/vmware.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/mpparse.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 100 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/pmu.c | 2 |
62 files changed, 1047 insertions, 804 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 1538562cc720..eb3abf8ac44e 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,6 +1,7 @@ - obj-y += entry/ +obj-$(CONFIG_PERF_EVENTS) += events/ + obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile new file mode 100644 index 000000000000..fdfea1511cc0 --- /dev/null +++ b/arch/x86/events/Makefile @@ -0,0 +1,13 @@ +obj-y += core.o + +obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o +obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o +endif +obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c index 58610539b048..049ada8d4e9c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/events/amd/core.c @@ -5,7 +5,7 @@ #include <linux/slab.h> #include <asm/apicdef.h> -#include "perf_event.h" +#include "../perf_event.h" static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c index 989d3c215d2b..51087c29b2c2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -14,7 +14,7 @@ #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" static u32 ibs_caps; @@ -670,7 +670,7 @@ static __init int perf_event_ibs_init(void) perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); - printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); + pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); return 0; } @@ -774,14 +774,14 @@ static int setup_ibs_ctl(int ibs_eilvt_off) pci_read_config_dword(cpu_cfg, IBSCTL, &value); if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); + pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", + value); return -EINVAL; } } while (1); if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); + pr_debug("No CPU node configured for IBS\n"); return -ENODEV; } @@ -810,7 +810,7 @@ static void force_ibs_eilvt_setup(void) preempt_enable(); if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); + pr_debug("No EILVT entry available\n"); return; } diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c index 97242a9242bd..635e5eba0caf 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -16,8 +16,8 @@ #include <linux/cpumask.h> #include <linux/slab.h> -#include "perf_event.h" -#include "perf_event_amd_iommu.h" +#include "../perf_event.h" +#include "iommu.h" #define COUNTER_SHIFT 16 diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h index 845d173278e3..845d173278e3 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.h +++ b/arch/x86/events/amd/iommu.h diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c index 8836fc9fa84b..3db9569e658c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -538,7 +538,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_nb; - printk(KERN_INFO "perf: AMD NB counters detected\n"); + pr_info("perf: AMD NB counters detected\n"); ret = 0; } @@ -552,7 +552,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_l2; - printk(KERN_INFO "perf: AMD L2I counters detected\n"); + pr_info("perf: AMD L2I counters detected\n"); ret = 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c index 1b443db2db50..5e830d0c95c9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/events/core.c @@ -254,15 +254,16 @@ static bool check_hw_exists(void) * We still allow the PMU driver to operate: */ if (bios_fail) { - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail); + pr_cont("Broken BIOS detected, complain to your hardware vendor.\n"); + pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", + reg_fail, val_fail); } return true; msr_fail: - printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + pr_cont("Broken PMU hardware detected, using software events only.\n"); + pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n", boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, reg, val_new); @@ -596,6 +597,19 @@ void x86_pmu_disable_all(void) } } +/* + * There may be PMI landing after enabled=0. The PMI hitting could be before or + * after disable_all. + * + * If PMI hits before disable_all, the PMU will be disabled in the NMI handler. + * It will not be re-enabled in the NMI handler again, because enabled=0. After + * handling the NMI, disable_all will be called, which will not change the + * state either. If PMI hits after disable_all, the PMU is already disabled + * before entering NMI handler. The NMI handler will not change the state + * either. + * + * So either situation is harmless. + */ static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c index 2cad71d1b14c..b99dc9258c0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/events/intel/bts.c @@ -26,7 +26,7 @@ #include <asm-generic/sizes.h> #include <asm/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" struct bts_ctx { struct perf_output_handle handle; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c index fed2ab1f1065..68fa55b4d42e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/events/intel/core.c @@ -18,7 +18,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Intel PerfMon, used on Core and later. @@ -1502,7 +1502,15 @@ static __initconst const u64 knl_hw_cache_extra_regs }; /* - * Use from PMIs where the LBRs are already disabled. + * Used from PMIs where the LBRs are already disabled. + * + * This function could be called consecutively. It is required to remain in + * disabled state if called consecutively. + * + * During consecutive calls, the same disable value will be written to related + * registers, so the PMU state remains unchanged. hw.state in + * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive + * calls. */ static void __intel_pmu_disable_all(void) { @@ -1884,6 +1892,16 @@ again: if (__test_and_clear_bit(62, (unsigned long *)&status)) { handled++; x86_pmu.drain_pebs(regs); + /* + * There are cases where, even though, the PEBS ovfl bit is set + * in GLOBAL_OVF_STATUS, the PEBS events may also have their + * overflow bits set for their counters. We must clear them + * here because they have been processed as exact samples in + * the drain_pebs() routine. They must not be processed again + * in the for_each_bit_set() loop for regular samples below. + */ + status &= ~cpuc->pebs_enabled; + status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; } /* @@ -1929,7 +1947,10 @@ again: goto again; done: - __intel_pmu_enable_all(0, true); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + __intel_pmu_enable_all(0, true); + /* * Only unmask the NMI after the overflow counters * have been reset. This avoids spurious NMIs on @@ -3396,6 +3417,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); x86_add_quirk(intel_nehalem_quirk); pr_cont("Nehalem events, "); @@ -3459,6 +3481,7 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); + intel_pmu_pebs_data_source_nhm(); pr_cont("Westmere events, "); break; @@ -3581,7 +3604,7 @@ __init int intel_pmu_init(void) intel_pmu_lbr_init_hsw(); x86_pmu.event_constraints = intel_bdw_event_constraints; - x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; x86_pmu.extra_regs = intel_snbep_extra_regs; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c index a316ca96f1b6..93cb412a5579 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -7,7 +7,7 @@ #include <linux/perf_event.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define MSR_IA32_PQR_ASSOC 0x0c8f #define MSR_IA32_QM_CTR 0x0c8e @@ -1244,15 +1244,12 @@ static struct pmu intel_cqm_pmu = { static inline void cqm_pick_event_reader(int cpu) { - int phys_id = topology_physical_package_id(cpu); - int i; + int reader; - for_each_cpu(i, &cqm_cpumask) { - if (phys_id == topology_physical_package_id(i)) - return; /* already got reader for this socket */ - } - - cpumask_set_cpu(cpu, &cqm_cpumask); + /* First online cpu in package becomes the reader */ + reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu)); + if (reader >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cqm_cpumask); } static void intel_cqm_cpu_starting(unsigned int cpu) @@ -1270,24 +1267,17 @@ static void intel_cqm_cpu_starting(unsigned int cpu) static void intel_cqm_cpu_exit(unsigned int cpu) { - int phys_id = topology_physical_package_id(cpu); - int i; + int target; - /* - * Is @cpu a designated cqm reader? - */ + /* Is @cpu the current cqm reader for this package ? */ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) return; - for_each_online_cpu(i) { - if (i == cpu) - continue; + /* Find another online reader in this package */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - if (phys_id == topology_physical_package_id(i)) { - cpumask_set_cpu(i, &cqm_cpumask); - break; - } - } + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &cqm_cpumask); } static int intel_cqm_cpu_notifier(struct notifier_block *nb, diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c index 75a38b5a2e26..7946c4231169 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,7 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c index 10602f0a438f..ce7211a07c0b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/events/intel/ds.c @@ -5,7 +5,7 @@ #include <asm/perf_event.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -51,7 +51,8 @@ union intel_x86_pebs_dse { #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) -static const u64 pebs_data_source[] = { +/* Version for Sandy Bridge and later */ +static u64 pebs_data_source[] = { P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ @@ -70,6 +71,14 @@ static const u64 pebs_data_source[] = { OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ }; +/* Patch up minor differences in the bits */ +void __init intel_pmu_pebs_data_source_nhm(void) +{ + pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT); + pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); + pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -269,7 +278,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); + buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -286,7 +295,7 @@ static int alloc_pebs_buffer(int cpu) per_cpu(insn_buffer, cpu) = ibuffer; } - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; ds->pebs_index = ds->pebs_buffer_base; @@ -722,6 +731,30 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_bdw_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ + /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ + /* Allow all events as PEBS with no flags */ + INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), + EVENT_CONSTRAINT_END +}; + + struct event_constraint intel_skl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ @@ -1319,19 +1352,28 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; int format = x86_pmu.intel_cap.pebs_format; switch (format) { case 0: - printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); + pr_cont("PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + /* + * Using >PAGE_SIZE buffers makes the WRMSR to + * PERF_GLOBAL_CTRL in intel_pmu_enable_all() + * mysteriously hang on Core2. + * + * As a workaround, we don't do this. + */ + x86_pmu.pebs_buffer_size = PAGE_SIZE; x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; break; case 1: - printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); + pr_cont("PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; @@ -1351,7 +1393,7 @@ void __init intel_ds_init(void) break; default: - printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); + pr_cont("no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; } } diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c index 5b0c232d1ee6..548d5f774b07 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/events/intel/knc.c @@ -5,7 +5,7 @@ #include <asm/hardirq.h> -#include "perf_event.h" +#include "../perf_event.h" static const u64 knc_perfmon_event_map[] = { @@ -263,7 +263,9 @@ again: goto again; done: - knc_pmu_enable_all(0); + /* Only restore PMU state when it's active. See x86_pmu_disable(). */ + if (cpuc->enabled) + knc_pmu_enable_all(0); return handled; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c index 653f88d25987..69dd11887dd1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -5,7 +5,7 @@ #include <asm/msr.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" enum { LBR_FORMAT_32 = 0x00, diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c index f2e56783af3d..0a5ede187d9c 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/events/intel/p4.c @@ -13,7 +13,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" #define P4_CNTR_LIMIT 3 /* diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c index 7c1a0c07b607..1f5c47ab4c65 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/events/intel/p6.c @@ -1,7 +1,7 @@ #include <linux/perf_event.h> #include <linux/types.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Not sure about some of these diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c index c0bbd1033b7c..6af7cf71d6b2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/events/intel/pt.c @@ -29,8 +29,8 @@ #include <asm/io.h> #include <asm/intel_pt.h> -#include "perf_event.h" -#include "intel_pt.h" +#include "../perf_event.h" +#include "pt.h" static DEFINE_PER_CPU(struct pt, pt_ctx); diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h index 336878a5d205..336878a5d205 100644 --- a/arch/x86/kernel/cpu/intel_pt.h +++ b/arch/x86/events/intel/pt.h diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c index 24a351ad628d..b834a3f55a01 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -44,11 +44,14 @@ * the duration of the measurement. Tools may use a function such as * ldexp(raw_count, -32); */ + +#define pr_fmt(fmt) "RAPL PMU: " fmt + #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" /* * RAPL energy status counters @@ -107,7 +110,7 @@ static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ static struct kobj_attribute format_attr_##_var = \ __ATTR(_name, 0444, __rapl_##_var##_show, NULL) -#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ +#define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ static struct perf_pmu_events_attr event_attr_##v = { \ @@ -117,23 +120,33 @@ static struct perf_pmu_events_attr event_attr_##v = { \ }; struct rapl_pmu { - spinlock_t lock; - int n_active; /* number of active events */ - struct list_head active_list; - struct pmu *pmu; /* pointer to rapl_pmu_class */ - ktime_t timer_interval; /* in ktime_t unit */ - struct hrtimer hrtimer; + raw_spinlock_t lock; + int n_active; + int cpu; + struct list_head active_list; + struct pmu *pmu; + ktime_t timer_interval; + struct hrtimer hrtimer; }; -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */ -static struct pmu rapl_pmu_class; +struct rapl_pmus { + struct pmu pmu; + unsigned int maxpkg; + struct rapl_pmu *pmus[]; +}; + + /* 1/2^hw_unit Joule */ +static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; +static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; -static int rapl_cntr_mask; +static unsigned int rapl_cntr_mask; +static u64 rapl_timer_ms; -static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); -static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); +static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +{ + return rapl_pmus->pmus[topology_logical_package_id(cpu)]; +} -static struct x86_pmu_quirk *rapl_quirks; static inline u64 rapl_read_counter(struct perf_event *event) { u64 raw; @@ -141,19 +154,10 @@ static inline u64 rapl_read_counter(struct perf_event *event) return raw; } -#define rapl_add_quirk(func_) \ -do { \ - static struct x86_pmu_quirk __quirk __initdata = { \ - .func = func_, \ - }; \ - __quirk.next = rapl_quirks; \ - rapl_quirks = &__quirk; \ -} while (0) - static inline u64 rapl_scale(u64 v, int cfg) { if (cfg > NR_RAPL_DOMAINS) { - pr_warn("invalid domain %d, failed to scale data\n", cfg); + pr_warn("Invalid domain %d, failed to scale data\n", cfg); return v; } /* @@ -206,27 +210,21 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) HRTIMER_MODE_REL_PINNED); } -static void rapl_stop_hrtimer(struct rapl_pmu *pmu) -{ - hrtimer_cancel(&pmu->hrtimer); -} - static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); struct perf_event *event; unsigned long flags; if (!pmu->n_active) return HRTIMER_NORESTART; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); - list_for_each_entry(event, &pmu->active_list, active_entry) { + list_for_each_entry(event, &pmu->active_list, active_entry) rapl_event_update(event); - } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); hrtimer_forward_now(hrtimer, pmu->timer_interval); @@ -260,28 +258,28 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); /* mark event as deactivated and stopped */ if (!(hwc->state & PERF_HES_STOPPED)) { WARN_ON_ONCE(pmu->n_active <= 0); pmu->n_active--; if (pmu->n_active == 0) - rapl_stop_hrtimer(pmu); + hrtimer_cancel(&pmu->hrtimer); list_del(&event->active_entry); @@ -299,23 +297,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); + struct rapl_pmu *pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (mode & PERF_EF_START) __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); return 0; } @@ -329,15 +327,19 @@ static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; int bit, msr, ret = 0; + struct rapl_pmu *pmu; /* only look at RAPL events */ - if (event->attr.type != rapl_pmu_class.type) + if (event->attr.type != rapl_pmus->pmu.type) return -ENOENT; /* check only supported bits are set */ if (event->attr.config & ~RAPL_EVENT_MASK) return -EINVAL; + if (event->cpu < 0) + return -EINVAL; + /* * check event is known (determines counter) */ @@ -376,6 +378,9 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; /* must be done before validate_group */ + pmu = cpu_to_rapl_pmu(event->cpu); + event->cpu = pmu->cpu; + event->pmu_private = pmu; event->hw.event_base = msr; event->hw.config = cfg; event->hw.idx = bit; @@ -506,139 +511,62 @@ const struct attribute_group *rapl_attr_groups[] = { NULL, }; -static struct pmu rapl_pmu_class = { - .attr_groups = rapl_attr_groups, - .task_ctx_nr = perf_invalid_context, /* system-wide only */ - .event_init = rapl_pmu_event_init, - .add = rapl_pmu_event_add, /* must have */ - .del = rapl_pmu_event_del, /* must have */ - .start = rapl_pmu_event_start, - .stop = rapl_pmu_event_stop, - .read = rapl_pmu_event_read, -}; - static void rapl_cpu_exit(int cpu) { - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - int i, phys_id = topology_physical_package_id(cpu); - int target = -1; + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; - /* find a new cpu on same package */ - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (phys_id == topology_physical_package_id(i)) { - target = i; - break; - } - } - /* - * clear cpu from cpumask - * if was set in cpumask and still some cpu on package, - * then move to new cpu - */ - if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) - cpumask_set_cpu(target, &rapl_cpu_mask); + /* Check if exiting cpu is used for collecting rapl events */ + if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) + return; - WARN_ON(cpumask_empty(&rapl_cpu_mask)); - /* - * migrate events and context to new cpu - */ - if (target >= 0) - perf_pmu_migrate_context(pmu->pmu, cpu, target); + pmu->cpu = -1; + /* Find a new cpu to collect rapl events */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* cancel overflow polling timer for CPU */ - rapl_stop_hrtimer(pmu); + /* Migrate rapl events to the new target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &rapl_cpu_mask); + pmu->cpu = target; + perf_pmu_migrate_context(pmu->pmu, cpu, target); + } } static void rapl_cpu_init(int cpu) { - int i, phys_id = topology_physical_package_id(cpu); + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); + int target; - /* check if phys_is is already covered */ - for_each_cpu(i, &rapl_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) - return; - } - /* was not found, so add it */ - cpumask_set_cpu(cpu, &rapl_cpu_mask); -} - -static __init void rapl_hsw_server_quirk(void) -{ /* - * DRAM domain on HSW server has fixed energy unit which can be - * different than the unit from power unit MSR. - * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 - * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + * Check if there is an online cpu in the package which collects rapl + * events already. */ - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); + if (target < nr_cpu_ids) + return; + + cpumask_set_cpu(cpu, &rapl_cpu_mask); + pmu->cpu = cpu; } static int rapl_cpu_prepare(int cpu) { - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - int phys_id = topology_physical_package_id(cpu); - u64 ms; + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); if (pmu) return 0; - if (phys_id < 0) - return -1; - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); if (!pmu) - return -1; - spin_lock_init(&pmu->lock); + return -ENOMEM; + raw_spin_lock_init(&pmu->lock); INIT_LIST_HEAD(&pmu->active_list); - - pmu->pmu = &rapl_pmu_class; - - /* - * use reference of 200W for scaling the timeout - * to avoid missing counter overflows. - * 200W = 200 Joules/sec - * divide interval by 2 to avoid lockstep (2 * 100) - * if hw unit is 32, then we use 2 ms 1/200/2 - */ - if (rapl_hw_unit[0] < 32) - ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1)); - else - ms = 2; - - pmu->timer_interval = ms_to_ktime(ms); - + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + pmu->cpu = -1; rapl_hrtimer_init(pmu); - - /* set RAPL pmu for this cpu for now */ - per_cpu(rapl_pmu, cpu) = pmu; - per_cpu(rapl_pmu_to_free, cpu) = NULL; - - return 0; -} - -static void rapl_cpu_kfree(int cpu) -{ - struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); - - kfree(pmu); - - per_cpu(rapl_pmu_to_free, cpu) = NULL; -} - -static int rapl_cpu_dying(int cpu) -{ - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); - - if (!pmu) - return 0; - - per_cpu(rapl_pmu, cpu) = NULL; - - per_cpu(rapl_pmu_to_free, cpu) = pmu; - + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; return 0; } @@ -651,28 +579,20 @@ static int rapl_cpu_notifier(struct notifier_block *self, case CPU_UP_PREPARE: rapl_cpu_prepare(cpu); break; - case CPU_STARTING: - rapl_cpu_init(cpu); - break; - case CPU_UP_CANCELED: - case CPU_DYING: - rapl_cpu_dying(cpu); - break; + + case CPU_DOWN_FAILED: case CPU_ONLINE: - case CPU_DEAD: - rapl_cpu_kfree(cpu); + rapl_cpu_init(cpu); break; + case CPU_DOWN_PREPARE: rapl_cpu_exit(cpu); break; - default: - break; } - return NOTIFY_OK; } -static int rapl_check_hw_unit(void) +static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; int i; @@ -683,28 +603,107 @@ static int rapl_check_hw_unit(void) for (i = 0; i < NR_RAPL_DOMAINS; i++) rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + /* + * DRAM domain on HSW server and KNL has fixed energy unit which can be + * different than the unit from power unit MSR. See + * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 + * of 2. Datasheet, September 2014, Reference Number: 330784-001 " + */ + if (apply_quirk) + rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + + /* + * Calculate the timer rate: + * Use reference of 200W for scaling the timeout to avoid counter + * overflows. 200W = 200 Joules/sec + * Divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + rapl_timer_ms = 2; + if (rapl_hw_unit[0] < 32) { + rapl_timer_ms = (1000 / (2 * 100)); + rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); + } + return 0; +} + +static void __init rapl_advertise(void) +{ + int i; + + pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", + hweight32(rapl_cntr_mask), rapl_timer_ms); + + for (i = 0; i < NR_RAPL_DOMAINS; i++) { + if (rapl_cntr_mask & (1 << i)) { + pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_domain_names[i], rapl_hw_unit[i]); + } + } +} + +static int __init rapl_prepare_cpus(void) +{ + unsigned int cpu, pkg; + int ret; + + for_each_online_cpu(cpu) { + pkg = topology_logical_package_id(cpu); + if (rapl_pmus->pmus[pkg]) + continue; + + ret = rapl_cpu_prepare(cpu); + if (ret) + return ret; + rapl_cpu_init(cpu); + } + return 0; +} + +static void __init cleanup_rapl_pmus(void) +{ + int i; + + for (i = 0; i < rapl_pmus->maxpkg; i++) + kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus); +} + +static int __init init_rapl_pmus(void) +{ + int maxpkg = topology_max_packages(); + size_t size; + + size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); + rapl_pmus = kzalloc(size, GFP_KERNEL); + if (!rapl_pmus) + return -ENOMEM; + + rapl_pmus->maxpkg = maxpkg; + rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; + rapl_pmus->pmu.event_init = rapl_pmu_event_init; + rapl_pmus->pmu.add = rapl_pmu_event_add; + rapl_pmus->pmu.del = rapl_pmu_event_del; + rapl_pmus->pmu.start = rapl_pmu_event_start; + rapl_pmus->pmu.stop = rapl_pmu_event_stop; + rapl_pmus->pmu.read = rapl_pmu_event_read; return 0; } -static const struct x86_cpu_id rapl_cpu_match[] = { +static const struct x86_cpu_id rapl_cpu_match[] __initconst = { [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, [1] = {}, }; static int __init rapl_pmu_init(void) { - struct rapl_pmu *pmu; - int cpu, ret; - struct x86_pmu_quirk *quirk; - int i; + bool apply_quirk = false; + int ret; - /* - * check for Intel processor family 6 - */ if (!x86_match_cpu(rapl_cpu_match)) - return 0; + return -ENODEV; - /* check supported CPU */ switch (boot_cpu_data.x86_model) { case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ @@ -712,7 +711,7 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_cln_attr; break; case 63: /* Haswell-Server */ - rapl_add_quirk(rapl_hsw_server_quirk); + apply_quirk = true; rapl_cntr_mask = RAPL_IDX_SRV; rapl_pmu_events_group.attrs = rapl_events_srv_attr; break; @@ -728,56 +727,41 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_srv_attr; break; case 87: /* Knights Landing */ - rapl_add_quirk(rapl_hsw_server_quirk); + apply_quirk = true; rapl_cntr_mask = RAPL_IDX_KNL; rapl_pmu_events_group.attrs = rapl_events_knl_attr; - + break; default: - /* unsupported */ - return 0; + return -ENODEV; } - ret = rapl_check_hw_unit(); + + ret = rapl_check_hw_unit(apply_quirk); if (ret) return ret; - /* run cpu model quirks */ - for (quirk = rapl_quirks; quirk; quirk = quirk->next) - quirk->func(); - cpu_notifier_register_begin(); + ret = init_rapl_pmus(); + if (ret) + return ret; - for_each_online_cpu(cpu) { - ret = rapl_cpu_prepare(cpu); - if (ret) - goto out; - rapl_cpu_init(cpu); - } + cpu_notifier_register_begin(); - __perf_cpu_notifier(rapl_cpu_notifier); + ret = rapl_prepare_cpus(); + if (ret) + goto out; - ret = perf_pmu_register(&rapl_pmu_class, "power", -1); - if (WARN_ON(ret)) { - pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); - cpu_notifier_register_done(); - return -1; - } + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); + if (ret) + goto out; - pmu = __this_cpu_read(rapl_pmu); + __perf_cpu_notifier(rapl_cpu_notifier); + cpu_notifier_register_done(); + rapl_advertise(); + return 0; - pr_info("RAPL PMU detected," - " API unit is 2^-32 Joules," - " %d fixed counters" - " %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), - ktime_to_ms(pmu->timer_interval)); - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { - pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); - } - } out: + pr_warn("Initialization failed (%d), disabled\n", ret); + cleanup_rapl_pmus(); cpu_notifier_register_done(); - - return 0; + return ret; } device_initcall(rapl_pmu_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c index 3bf41d413775..7012d18bb293 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,4 @@ -#include "perf_event_intel_uncore.h" +#include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; @@ -9,9 +9,9 @@ struct pci_driver *uncore_pci_driver; /* pci bus to socket mapping */ DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); -struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +struct pci_extra_dev *uncore_extra_pci_dev; +static int max_packages; -static DEFINE_RAW_SPINLOCK(uncore_box_lock); /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -21,7 +21,7 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); -int uncore_pcibus_to_physid(struct pci_bus *bus) +static int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; int phys_id = -1; @@ -38,6 +38,16 @@ int uncore_pcibus_to_physid(struct pci_bus *bus) return phys_id; } +static void uncore_free_pcibus_map(void) +{ + struct pci2phy_map *map, *tmp; + + list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { + list_del(&map->list); + kfree(map); + } +} + struct pci2phy_map *__find_pci2phy_map(int segment) { struct pci2phy_map *map, *alloc = NULL; @@ -82,43 +92,9 @@ ssize_t uncore_event_show(struct kobject *kobj, return sprintf(buf, "%s", event->config); } -struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) -{ - return container_of(event->pmu, struct intel_uncore_pmu, pmu); -} - struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - struct intel_uncore_box *box; - - box = *per_cpu_ptr(pmu->box, cpu); - if (box) - return box; - - raw_spin_lock(&uncore_box_lock); - /* Recheck in lock to handle races. */ - if (*per_cpu_ptr(pmu->box, cpu)) - goto out; - list_for_each_entry(box, &pmu->box_list, list) { - if (box->phys_id == topology_physical_package_id(cpu)) { - atomic_inc(&box->refcnt); - *per_cpu_ptr(pmu->box, cpu) = box; - break; - } - } -out: - raw_spin_unlock(&uncore_box_lock); - - return *per_cpu_ptr(pmu->box, cpu); -} - -struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) -{ - /* - * perf core schedules event on the basis of cpu, uncore events are - * collected by one of the cpus inside a physical package. - */ - return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); + return pmu->boxes[topology_logical_package_id(cpu)]; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -207,7 +183,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) return config; } -static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) +static void uncore_assign_hw_event(struct intel_uncore_box *box, + struct perf_event *event, int idx) { struct hw_perf_event *hwc = &event->hw; @@ -302,24 +279,25 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, + int node) { + int i, size, numshared = type->num_shared_regs ; struct intel_uncore_box *box; - int i, size; - size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); + size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; - for (i = 0; i < type->num_shared_regs; i++) + for (i = 0; i < numshared; i++) raw_spin_lock_init(&box->shared_regs[i].lock); uncore_pmu_init_hrtimer(box); - atomic_set(&box->refcnt, 1); box->cpu = -1; - box->phys_id = -1; + box->pci_phys_id = -1; + box->pkgid = -1; /* set default hrtimer timeout */ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; @@ -341,7 +319,8 @@ static bool is_uncore_event(struct perf_event *event) } static int -uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) +uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, + bool dogrp) { struct perf_event *event; int n, max_count; @@ -402,7 +381,8 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve return &type->unconstrainted; } -static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) +static void uncore_put_event_constraint(struct intel_uncore_box *box, + struct perf_event *event) { if (box->pmu->type->ops->put_constraint) box->pmu->type->ops->put_constraint(box, event); @@ -582,7 +562,7 @@ static void uncore_pmu_event_del(struct perf_event *event, int flags) if (event == box->event_list[i]) { uncore_put_event_constraint(box, event); - while (++i < box->n_events) + for (++i; i < box->n_events; i++) box->event_list[i - 1] = box->event_list[i]; --box->n_events; @@ -676,6 +656,7 @@ static int uncore_pmu_event_init(struct perf_event *event) if (!box || box->cpu < 0) return -EINVAL; event->cpu = box->cpu; + event->pmu_private = box; event->hw.idx = -1; event->hw.last_tag = ~0ULL; @@ -760,64 +741,110 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) } ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); + if (!ret) + pmu->registered = true; return ret; } +static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) +{ + if (!pmu->registered) + return; + perf_pmu_unregister(&pmu->pmu); + pmu->registered = false; +} + +static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) +{ + struct intel_uncore_pmu *pmu = type->pmus; + struct intel_uncore_box *box; + int i, pkg; + + if (pmu) { + pkg = topology_physical_package_id(cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box) + uncore_box_exit(box); + } + } +} + +static void __init uncore_exit_boxes(void *dummy) +{ + struct intel_uncore_type **types; + + for (types = uncore_msr_uncores; *types; types++) + __uncore_exit_boxes(*types++, smp_processor_id()); +} + +static void uncore_free_boxes(struct intel_uncore_pmu *pmu) +{ + int pkg; + + for (pkg = 0; pkg < max_packages; pkg++) + kfree(pmu->boxes[pkg]); + kfree(pmu->boxes); +} + static void __init uncore_type_exit(struct intel_uncore_type *type) { + struct intel_uncore_pmu *pmu = type->pmus; int i; - for (i = 0; i < type->num_boxes; i++) - free_percpu(type->pmus[i].box); - kfree(type->pmus); - type->pmus = NULL; + if (pmu) { + for (i = 0; i < type->num_boxes; i++, pmu++) { + uncore_pmu_unregister(pmu); + uncore_free_boxes(pmu); + } + kfree(type->pmus); + type->pmus = NULL; + } kfree(type->events_group); type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) { - int i; - for (i = 0; types[i]; i++) - uncore_type_exit(types[i]); + for (; *types; types++) + uncore_type_exit(*types); } -static int __init uncore_type_init(struct intel_uncore_type *type) +static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) { struct intel_uncore_pmu *pmus; struct attribute_group *attr_group; struct attribute **attrs; + size_t size; int i, j; pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); if (!pmus) return -ENOMEM; - type->pmus = pmus; + size = max_packages * sizeof(struct intel_uncore_box *); + for (i = 0; i < type->num_boxes; i++) { + pmus[i].func_id = setid ? i : -1; + pmus[i].pmu_idx = i; + pmus[i].type = type; + pmus[i].boxes = kzalloc(size, GFP_KERNEL); + if (!pmus[i].boxes) + return -ENOMEM; + } + + type->pmus = pmus; type->unconstrainted = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 0, type->num_counters, 0, 0); - for (i = 0; i < type->num_boxes; i++) { - pmus[i].func_id = -1; - pmus[i].pmu_idx = i; - pmus[i].type = type; - INIT_LIST_HEAD(&pmus[i].box_list); - pmus[i].box = alloc_percpu(struct intel_uncore_box *); - if (!pmus[i].box) - goto fail; - } - if (type->event_descs) { - i = 0; - while (type->event_descs[i].attr.attr.name) - i++; + for (i = 0; type->event_descs[i].attr.attr.name; i++); attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + sizeof(*attr_group), GFP_KERNEL); if (!attr_group) - goto fail; + return -ENOMEM; attrs = (struct attribute **)(attr_group + 1); attr_group->name = "events"; @@ -831,25 +858,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type) type->pmu_group = &uncore_pmu_attr_group; return 0; -fail: - uncore_type_exit(type); - return -ENOMEM; } -static int __init uncore_types_init(struct intel_uncore_type **types) +static int __init +uncore_types_init(struct intel_uncore_type **types, bool setid) { - int i, ret; + int ret; - for (i = 0; types[i]; i++) { - ret = uncore_type_init(types[i]); + for (; *types; types++) { + ret = uncore_type_init(*types, setid); if (ret) - goto fail; + return ret; } return 0; -fail: - while (--i >= 0) - uncore_type_exit(types[i]); - return ret; } /* @@ -857,28 +878,28 @@ fail: */ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - struct intel_uncore_type *type; - int phys_id; - bool first_box = false; + int phys_id, pkg, ret; phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; + pkg = topology_phys_to_logical_pkg(phys_id); + if (WARN_ON_ONCE(pkg < 0)) + return -EINVAL; + if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { int idx = UNCORE_PCI_DEV_IDX(id->driver_data); - uncore_extra_pci_dev[phys_id][idx] = pdev; + + uncore_extra_pci_dev[pkg].dev[idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, NUMA_NO_NODE); - if (!box) - return -ENOMEM; - /* * for performance monitoring unit with multiple boxes, * each box has a different function id. @@ -890,44 +911,60 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id * some device types. Hence PCI device idx would be 0 for all devices. * So increment pmu pointer to point to an unused array element. */ - if (boot_cpu_data.x86_model == 87) + if (boot_cpu_data.x86_model == 87) { while (pmu->func_id >= 0) pmu++; + } + + if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) + return -EINVAL; + + box = uncore_alloc_box(type, NUMA_NO_NODE); + if (!box) + return -ENOMEM; + if (pmu->func_id < 0) pmu->func_id = pdev->devfn; else WARN_ON_ONCE(pmu->func_id != pdev->devfn); - box->phys_id = phys_id; + atomic_inc(&box->refcnt); + box->pci_phys_id = phys_id; + box->pkgid = pkg; box->pci_dev = pdev; box->pmu = pmu; uncore_box_init(box); pci_set_drvdata(pdev, box); - raw_spin_lock(&uncore_box_lock); - if (list_empty(&pmu->box_list)) - first_box = true; - list_add_tail(&box->list, &pmu->box_list); - raw_spin_unlock(&uncore_box_lock); + pmu->boxes[pkg] = box; + if (atomic_inc_return(&pmu->activeboxes) > 1) + return 0; - if (first_box) - uncore_pmu_register(pmu); - return 0; + /* First active box registers the pmu */ + ret = uncore_pmu_register(pmu); + if (ret) { + pci_set_drvdata(pdev, NULL); + pmu->boxes[pkg] = NULL; + uncore_box_exit(box); + kfree(box); + } + return ret; } static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id; - bool last_box = false; + int i, phys_id, pkg; phys_id = uncore_pcibus_to_physid(pdev->bus); + pkg = topology_phys_to_logical_pkg(phys_id); + box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (uncore_extra_pci_dev[phys_id][i] == pdev) { - uncore_extra_pci_dev[phys_id][i] = NULL; + if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { + uncore_extra_pci_dev[pkg].dev[i] = NULL; break; } } @@ -936,33 +973,20 @@ static void uncore_pci_remove(struct pci_dev *pdev) } pmu = box->pmu; - if (WARN_ON_ONCE(phys_id != box->phys_id)) + if (WARN_ON_ONCE(phys_id != box->pci_phys_id)) return; pci_set_drvdata(pdev, NULL); - - raw_spin_lock(&uncore_box_lock); - list_del(&box->list); - if (list_empty(&pmu->box_list)) - last_box = true; - raw_spin_unlock(&uncore_box_lock); - - for_each_possible_cpu(cpu) { - if (*per_cpu_ptr(pmu->box, cpu) == box) { - *per_cpu_ptr(pmu->box, cpu) = NULL; - atomic_dec(&box->refcnt); - } - } - - WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); + pmu->boxes[pkg] = NULL; + if (atomic_dec_return(&pmu->activeboxes) == 0) + uncore_pmu_unregister(pmu); + uncore_box_exit(box); kfree(box); - - if (last_box) - perf_pmu_unregister(&pmu->pmu); } static int __init uncore_pci_init(void) { + size_t size; int ret; switch (boot_cpu_data.x86_model) { @@ -999,25 +1023,40 @@ static int __init uncore_pci_init(void) ret = skl_uncore_pci_init(); break; default: - return 0; + return -ENODEV; } if (ret) return ret; - ret = uncore_types_init(uncore_pci_uncores); + size = max_packages * sizeof(struct pci_extra_dev); + uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); + if (!uncore_extra_pci_dev) { + ret = -ENOMEM; + goto err; + } + + ret = uncore_types_init(uncore_pci_uncores, false); if (ret) - return ret; + goto errtype; uncore_pci_driver->probe = uncore_pci_probe; uncore_pci_driver->remove = uncore_pci_remove; ret = pci_register_driver(uncore_pci_driver); - if (ret == 0) - pcidrv_registered = true; - else - uncore_types_exit(uncore_pci_uncores); + if (ret) + goto errtype; + + pcidrv_registered = true; + return 0; +errtype: + uncore_types_exit(uncore_pci_uncores); + kfree(uncore_extra_pci_dev); + uncore_extra_pci_dev = NULL; + uncore_free_pcibus_map(); +err: + uncore_pci_uncores = empty_uncore; return ret; } @@ -1027,173 +1066,139 @@ static void __init uncore_pci_exit(void) pcidrv_registered = false; pci_unregister_driver(uncore_pci_driver); uncore_types_exit(uncore_pci_uncores); - } -} - -/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */ -static LIST_HEAD(boxes_to_free); - -static void uncore_kfree_boxes(void) -{ - struct intel_uncore_box *box; - - while (!list_empty(&boxes_to_free)) { - box = list_entry(boxes_to_free.next, - struct intel_uncore_box, list); - list_del(&box->list); - kfree(box); + kfree(uncore_extra_pci_dev); + uncore_free_pcibus_map(); } } static void uncore_cpu_dying(int cpu) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - box = *per_cpu_ptr(pmu->box, cpu); - *per_cpu_ptr(pmu->box, cpu) = NULL; - if (box && atomic_dec_and_test(&box->refcnt)) - list_add(&box->list, &boxes_to_free); + int i, pkg; + + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); } } } -static int uncore_cpu_starting(int cpu) +static void uncore_cpu_starting(int cpu, bool init) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box, *exist; - int i, j, k, phys_id; - - phys_id = topology_physical_package_id(cpu); - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - box = *per_cpu_ptr(pmu->box, cpu); - /* called by uncore_cpu_init? */ - if (box && box->phys_id >= 0) { - uncore_box_init(box); - continue; - } + struct intel_uncore_box *box; + int i, pkg, ncpus = 1; - for_each_online_cpu(k) { - exist = *per_cpu_ptr(pmu->box, k); - if (exist && exist->phys_id == phys_id) { - atomic_inc(&exist->refcnt); - *per_cpu_ptr(pmu->box, cpu) = exist; - if (box) { - list_add(&box->list, - &boxes_to_free); - box = NULL; - } - break; - } - } + if (init) { + /* + * On init we get the number of online cpus in the package + * and set refcount for all of them. + */ + ncpus = cpumask_weight(topology_core_cpumask(cpu)); + } - if (box) { - box->phys_id = phys_id; + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box) + continue; + /* The first cpu on a package activates the box */ + if (atomic_add_return(ncpus, &box->refcnt) == ncpus) uncore_box_init(box); - } } } - return 0; } -static int uncore_cpu_prepare(int cpu, int phys_id) +static int uncore_cpu_prepare(int cpu) { - struct intel_uncore_type *type; + struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - if (pmu->func_id < 0) - pmu->func_id = j; - + int i, pkg; + + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[pkg]) + continue; + /* First cpu of a package allocates the box */ box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; - box->pmu = pmu; - box->phys_id = phys_id; - *per_cpu_ptr(pmu->box, cpu) = box; + box->pkgid = pkg; + pmu->boxes[pkg] = box; } } return 0; } -static void -uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) +static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, + int new_cpu) { - struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; - int i, j; + int i, pkg; - for (i = 0; uncores[i]; i++) { - type = uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - if (old_cpu < 0) - box = uncore_pmu_to_box(pmu, new_cpu); - else - box = uncore_pmu_to_box(pmu, old_cpu); - if (!box) - continue; - - if (old_cpu < 0) { - WARN_ON_ONCE(box->cpu != -1); - box->cpu = new_cpu; - continue; - } + pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box) + continue; - WARN_ON_ONCE(box->cpu != old_cpu); - if (new_cpu >= 0) { - uncore_pmu_cancel_hrtimer(box); - perf_pmu_migrate_context(&pmu->pmu, - old_cpu, new_cpu); - box->cpu = new_cpu; - } else { - box->cpu = -1; - } + if (old_cpu < 0) { + WARN_ON_ONCE(box->cpu != -1); + box->cpu = new_cpu; + continue; } + + WARN_ON_ONCE(box->cpu != old_cpu); + box->cpu = -1; + if (new_cpu < 0) + continue; + + uncore_pmu_cancel_hrtimer(box); + perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); + box->cpu = new_cpu; } } +static void uncore_change_context(struct intel_uncore_type **uncores, + int old_cpu, int new_cpu) +{ + for (; *uncores; uncores++) + uncore_change_type_ctx(*uncores, old_cpu, new_cpu); +} + static void uncore_event_exit_cpu(int cpu) { - int i, phys_id, target; + int target; - /* if exiting cpu is used for collecting uncore events */ + /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) return; - /* find a new cpu to collect uncore events */ - phys_id = topology_physical_package_id(cpu); - target = -1; - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (phys_id == topology_physical_package_id(i)) { - target = i; - break; - } - } + /* Find a new cpu to collect uncore events */ + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* migrate uncore events to the new cpu */ - if (target >= 0) + /* Migrate uncore events to the new target */ + if (target < nr_cpu_ids) cpumask_set_cpu(target, &uncore_cpu_mask); + else + target = -1; uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); @@ -1201,13 +1206,15 @@ static void uncore_event_exit_cpu(int cpu) static void uncore_event_init_cpu(int cpu) { - int i, phys_id; + int target; - phys_id = topology_physical_package_id(cpu); - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) - return; - } + /* + * Check if there is an online cpu in the package + * which collects uncore events already. + */ + target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); + if (target < nr_cpu_ids) + return; cpumask_set_cpu(cpu, &uncore_cpu_mask); @@ -1220,39 +1227,25 @@ static int uncore_cpu_notifier(struct notifier_block *self, { unsigned int cpu = (long)hcpu; - /* allocate/free data structure for uncore box */ switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - uncore_cpu_prepare(cpu, -1); - break; + return notifier_from_errno(uncore_cpu_prepare(cpu)); + case CPU_STARTING: - uncore_cpu_starting(cpu); + uncore_cpu_starting(cpu, false); + case CPU_DOWN_FAILED: + uncore_event_init_cpu(cpu); break; + case CPU_UP_CANCELED: case CPU_DYING: uncore_cpu_dying(cpu); break; - case CPU_ONLINE: - case CPU_DEAD: - uncore_kfree_boxes(); - break; - default: - break; - } - /* select the cpu that collects uncore events */ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_STARTING: - uncore_event_init_cpu(cpu); - break; case CPU_DOWN_PREPARE: uncore_event_exit_cpu(cpu); break; - default: - break; } - return NOTIFY_OK; } @@ -1265,9 +1258,29 @@ static struct notifier_block uncore_cpu_nb = { .priority = CPU_PRI_PERF + 1, }; -static void __init uncore_cpu_setup(void *dummy) +static int __init type_pmu_register(struct intel_uncore_type *type) { - uncore_cpu_starting(smp_processor_id()); + int i, ret; + + for (i = 0; i < type->num_boxes; i++) { + ret = uncore_pmu_register(&type->pmus[i]); + if (ret) + return ret; + } + return 0; +} + +static int __init uncore_msr_pmus_register(void) +{ + struct intel_uncore_type **types = uncore_msr_uncores; + int ret; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + return ret; + } + return 0; } static int __init uncore_cpu_init(void) @@ -1311,71 +1324,61 @@ static int __init uncore_cpu_init(void) knl_uncore_cpu_init(); break; default: - return 0; + return -ENODEV; } - ret = uncore_types_init(uncore_msr_uncores); + ret = uncore_types_init(uncore_msr_uncores, true); if (ret) - return ret; + goto err; + ret = uncore_msr_pmus_register(); + if (ret) + goto err; return 0; +err: + uncore_types_exit(uncore_msr_uncores); + uncore_msr_uncores = empty_uncore; + return ret; } -static int __init uncore_pmus_register(void) +static void __init uncore_cpu_setup(void *dummy) { - struct intel_uncore_pmu *pmu; - struct intel_uncore_type *type; - int i, j; - - for (i = 0; uncore_msr_uncores[i]; i++) { - type = uncore_msr_uncores[i]; - for (j = 0; j < type->num_boxes; j++) { - pmu = &type->pmus[j]; - uncore_pmu_register(pmu); - } - } - - return 0; + uncore_cpu_starting(smp_processor_id(), true); } -static void __init uncore_cpumask_init(void) -{ - int cpu; - - /* - * ony invoke once from msr or pci init code - */ - if (!cpumask_empty(&uncore_cpu_mask)) - return; +/* Lazy to avoid allocation of a few bytes for the normal case */ +static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); - cpu_notifier_register_begin(); +static int __init uncore_cpumask_init(bool msr) +{ + unsigned int cpu; for_each_online_cpu(cpu) { - int i, phys_id = topology_physical_package_id(cpu); + unsigned int pkg = topology_logical_package_id(cpu); + int ret; - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) { - phys_id = -1; - break; - } - } - if (phys_id < 0) + if (test_and_set_bit(pkg, packages)) continue; - - uncore_cpu_prepare(cpu, phys_id); + /* + * The first online cpu of each package allocates and takes + * the refcounts for all other online cpus in that package. + * If msrs are not enabled no allocation is required. + */ + if (msr) { + ret = uncore_cpu_prepare(cpu); + if (ret) + return ret; + } uncore_event_init_cpu(cpu); + smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); } - on_each_cpu(uncore_cpu_setup, NULL, 1); - __register_cpu_notifier(&uncore_cpu_nb); - - cpu_notifier_register_done(); + return 0; } - static int __init intel_uncore_init(void) { - int ret; + int pret, cret, ret; if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; @@ -1383,19 +1386,27 @@ static int __init intel_uncore_init(void) if (cpu_has_hypervisor) return -ENODEV; - ret = uncore_pci_init(); - if (ret) - goto fail; - ret = uncore_cpu_init(); - if (ret) { - uncore_pci_exit(); - goto fail; - } - uncore_cpumask_init(); + max_packages = topology_max_packages(); + + pret = uncore_pci_init(); + cret = uncore_cpu_init(); - uncore_pmus_register(); + if (cret && pret) + return -ENODEV; + + cpu_notifier_register_begin(); + ret = uncore_cpumask_init(!cret); + if (ret) + goto err; + cpu_notifier_register_done(); return 0; -fail: + +err: + /* Undo box->init_box() */ + on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); + uncore_types_exit(uncore_msr_uncores); + uncore_pci_exit(); + cpu_notifier_register_done(); return ret; } device_initcall(intel_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h index a7086b862156..79766b9a3580 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -1,8 +1,10 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/pci.h> +#include <asm/apicdef.h> + #include <linux/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" #define UNCORE_PMU_NAME_LEN 32 #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) @@ -19,11 +21,12 @@ #define UNCORE_EXTRA_PCI_DEV 0xff #define UNCORE_EXTRA_PCI_DEV_MAX 3 -/* support up to 8 sockets */ -#define UNCORE_SOCKET_MAX 8 - #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) +struct pci_extra_dev { + struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX]; +}; + struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; @@ -61,6 +64,7 @@ struct intel_uncore_type { struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); + void (*exit_box)(struct intel_uncore_box *); void (*disable_box)(struct intel_uncore_box *); void (*enable_box)(struct intel_uncore_box *); void (*disable_event)(struct intel_uncore_box *, struct perf_event *); @@ -73,13 +77,14 @@ struct intel_uncore_ops { }; struct intel_uncore_pmu { - struct pmu pmu; - char name[UNCORE_PMU_NAME_LEN]; - int pmu_idx; - int func_id; - struct intel_uncore_type *type; - struct intel_uncore_box ** __percpu box; - struct list_head box_list; + struct pmu pmu; + char name[UNCORE_PMU_NAME_LEN]; + int pmu_idx; + int func_id; + bool registered; + atomic_t activeboxes; + struct intel_uncore_type *type; + struct intel_uncore_box **boxes; }; struct intel_uncore_extra_reg { @@ -89,7 +94,8 @@ struct intel_uncore_extra_reg { }; struct intel_uncore_box { - int phys_id; + int pci_phys_id; + int pkgid; int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ @@ -123,7 +129,6 @@ struct pci2phy_map { int pbus_to_physid[256]; }; -int uncore_pcibus_to_physid(struct pci_bus *bus); struct pci2phy_map *__find_pci2phy_map(int segment); ssize_t uncore_event_show(struct kobject *kobj, @@ -305,14 +310,30 @@ static inline void uncore_box_init(struct intel_uncore_box *box) } } +static inline void uncore_box_exit(struct intel_uncore_box *box) +{ + if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->exit_box) + box->pmu->type->ops->exit_box(box); + } +} + static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { - return (box->phys_id < 0); + return (box->pkgid < 0); +} + +static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ + return event->pmu_private; } -struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event); struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); -struct intel_uncore_box *uncore_event_to_box(struct perf_event *event); u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); @@ -328,7 +349,7 @@ extern struct intel_uncore_type **uncore_pci_uncores; extern struct pci_driver *uncore_pci_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; -extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; +extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; /* perf_event_intel_uncore_snb.c */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 2749965afed0..cda569332005 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,5 +1,5 @@ /* Nehalem-EX/Westmere-EX uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* NHM-EX event control */ #define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff @@ -201,6 +201,11 @@ static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); } +static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); +} + static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); @@ -250,6 +255,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p #define NHMEX_UNCORE_OPS_COMMON_INIT() \ .init_box = nhmex_uncore_msr_init_box, \ + .exit_box = nhmex_uncore_msr_exit_box, \ .disable_box = nhmex_uncore_msr_disable_box, \ .enable_box = nhmex_uncore_msr_enable_box, \ .disable_event = nhmex_uncore_msr_disable_event, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 2bd030ddd0db..96531d2b843f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,5 +1,5 @@ /* Nehalem/SandBridge/Haswell uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 @@ -95,6 +95,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0); +} + static struct uncore_event_desc snb_uncore_events[] = { INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), { /* end: all zeroes */ }, @@ -116,6 +122,7 @@ static struct attribute_group snb_uncore_format_group = { static struct intel_uncore_ops snb_uncore_msr_ops = { .init_box = snb_uncore_msr_init_box, + .exit_box = snb_uncore_msr_exit_box, .disable_event = snb_uncore_msr_disable_event, .enable_event = snb_uncore_msr_enable_event, .read_counter = uncore_msr_read_counter, @@ -231,6 +238,11 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } +static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) +{ + iounmap(box->io_addr); +} + static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) {} @@ -301,6 +313,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) return -EINVAL; event->cpu = box->cpu; + event->pmu_private = box; event->hw.idx = -1; event->hw.last_tag = ~0ULL; @@ -458,6 +471,7 @@ static struct pmu snb_uncore_imc_pmu = { static struct intel_uncore_ops snb_uncore_imc_ops = { .init_box = snb_uncore_imc_init_box, + .exit_box = snb_uncore_imc_exit_box, .enable_box = snb_uncore_imc_enable_box, .disable_box = snb_uncore_imc_disable_box, .disable_event = snb_uncore_imc_disable_event, diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 33acb884ccf1..93f6bd9bf761 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,5 @@ /* SandyBridge-EP/IvyTown uncore support */ -#include "perf_event_intel_uncore.h" - +#include "uncore.h" /* SNB-EP Box level control */ #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) @@ -987,7 +986,9 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx]; + int pkg = topology_phys_to_logical_pkg(box->pci_phys_id); + struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; + if (filter_pdev) { pci_write_config_dword(filter_pdev, reg1->reg, (u32)reg1->config); @@ -2521,14 +2522,16 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { void hswep_uncore_cpu_init(void) { + int pkg = topology_phys_to_logical_pkg(0); + if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { + if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { u32 capid4; - pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], + pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], 0x94, &capid4); if (((capid4 >> 6) & 0x3) == 0) hswep_uncore_sbox.num_boxes = 2; @@ -2875,11 +2878,13 @@ static struct intel_uncore_type bdx_uncore_sbox = { .format_group = &hswep_uncore_sbox_format_group, }; +#define BDX_MSR_UNCORE_SBOX 3 + static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, - &bdx_uncore_sbox, &hswep_uncore_pcu, + &bdx_uncore_sbox, NULL, }; @@ -2888,6 +2893,10 @@ void bdx_uncore_cpu_init(void) if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; + + /* BDX-DE doesn't have SBOX */ + if (boot_cpu_data.x86_model == 86) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; } static struct intel_uncore_type bdx_uncore_ha = { diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c index ec863b9a9f78..ec863b9a9f78 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/events/msr.c diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h index 7bb61e32fb29..68155cafa8a1 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -586,6 +586,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1; int pebs_record_size; + int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); @@ -860,6 +861,8 @@ extern struct event_constraint intel_ivb_pebs_event_constraints[]; extern struct event_constraint intel_hsw_pebs_event_constraints[]; +extern struct event_constraint intel_bdw_pebs_event_constraints[]; + extern struct event_constraint intel_skl_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); @@ -904,6 +907,8 @@ void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); +void intel_pmu_pebs_data_source_nhm(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1514753fd435..15340e36ddcb 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -256,7 +256,7 @@ extern int force_personality32; instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ -#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) +#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX]) /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 7bcb861a04e5..5a2ed3ed2f26 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -165,6 +165,7 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_ASIF BIT_ULL(60) #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) #define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) +#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) /* * IBS cpuid feature detection diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 20c11d1aa4cc..813384ef811a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -129,6 +129,8 @@ struct cpuinfo_x86 { u16 booted_cores; /* Physical processor id: */ u16 phys_proc_id; + /* Logical processor id: */ + u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; /* Compute unit id */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0fb46482dfde..7f991bd5031b 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { } extern const struct cpumask *cpu_coregroup_mask(int cpu); +#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #ifdef ENABLE_TOPO_DEFINES #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +extern unsigned int __max_logical_packages; +#define topology_max_packages() (__max_logical_packages) +int topology_update_package_map(unsigned int apicid, unsigned int cpu); +extern int topology_phys_to_logical_pkg(unsigned int pkg); +#else +#define topology_max_packages() (1) +static inline int +topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8a5cddac7d44..531b9611c51d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2078,6 +2078,20 @@ int generic_processor_info(int apicid, int version) cpu = cpumask_next_zero(-1, cpu_present_mask); /* + * This can happen on physical hotplug. The sanity check at boot time + * is done from native_smp_prepare_cpus() after num_possible_cpus() is + * established. + */ + if (topology_update_package_map(apicid, cpu) < 0) { + int thiscpu = max + disabled_cpus; + + pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n", + thiscpu, apicid); + disabled_cpus++; + return -ENOSPC; + } + + /* * Validate version */ if (version == 0x0) { diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 58031303e304..7a60424d63fa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o - -ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o -endif -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o - -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ - perf_event_intel_uncore_snb.o \ - perf_event_intel_uncore_snbep.o \ - perf_event_intel_uncore_nhmex.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o -endif - - obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a07956a08936..97c59fd60702 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) void (*f_vide)(void); u64 d, d2; - printk(KERN_INFO "AMD K6 stepping B detected - "); + pr_info("AMD K6 stepping B detected - "); /* * It looks like AMD fixed the 2.6.2 bug and improved indirect @@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c) d = d2-d; if (d > 20*K6_BUG_LOOP) - printk(KERN_CONT - "system stability may be impaired when more than 32 MB are used.\n"); + pr_cont("system stability may be impaired when more than 32 MB are used.\n"); else - printk(KERN_CONT "probably OK (after B9730xxxx).\n"); + pr_cont("probably OK (after B9730xxxx).\n"); } /* K6 with old style WHCR */ @@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + pr_info("Enabling old style K6 write allocation for %d Mb\n", mbytes); } return; @@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + pr_info("Enabling new style K6 write allocation for %d Mb\n", mbytes); } @@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) */ if (c->x86_model >= 6 && c->x86_model <= 10) { if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + pr_info("Enabling disabled K7/SSE Support.\n"); msr_clear_bit(MSR_K7_HWCR, 15); set_cpu_cap(c, X86_FEATURE_XMM); } @@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); + pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); } } @@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { unsigned long pfn = tseg >> PAGE_SHIFT; - printk(KERN_DEBUG "tseg: %010llx\n", tseg); + pr_debug("tseg: %010llx\n", tseg); if (pfn_range_is_mapped(pfn, pfn + 1)) set_memory_4k((unsigned long)__va(tseg), 1); } @@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_K7_HWCR, val); if (!(val & BIT(24))) - printk(KERN_WARNING FW_BUG "TSC doesn't count " - "with P0 frequency!\n"); + pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n"); } } diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 04f0fe5af83e..a972ac4c7e7d 100644 --- a/arch/x86/kernel/cpu/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c @@ -15,7 +15,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #if !defined(CONFIG_SMP) - printk(KERN_INFO "CPU: "); + pr_info("CPU: "); print_cpu_info(&boot_cpu_data); #endif alternative_instructions(); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index ae20be6e483c..ce197bb7c129 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_FCR, lo, hi); lo |= ACE_FCR; /* enable ACE unit */ wrmsr(MSR_VIA_FCR, lo, hi); - printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + pr_info("CPU: Enabled ACE h/w crypto\n"); } /* enable RNG unit, if present and disabled */ @@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_RNG, lo, hi); lo |= RNG_ENABLE; /* enable RNG unit */ wrmsr(MSR_VIA_RNG, lo, hi); - printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + pr_info("CPU: Enabled h/w RNG\n"); } /* store Centaur Extended Feature Flags as @@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c) name = "C6"; fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; fcr_clr = DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); + pr_notice("Disabling bugged TSC.\n"); clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: @@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c) newlo = (lo|fcr_set) & (~fcr_clr); if (newlo != lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", + pr_info("Centaur FCR was 0x%X now 0x%X\n", lo, newlo); wrmsr(MSR_IDT_FCR1, newlo, hi); } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); + pr_info("Centaur FCR is 0x%X\n", lo); } /* Emulate MTRRs using Centaur's MCR. */ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37830de8f60a..81cf716f6f97 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -228,7 +228,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) lo |= 0x200000; wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); + pr_notice("CPU serial number disabled.\n"); clear_cpu_cap(c, X86_FEATURE_PN); /* Disabling the serial number may affect the cpuid level */ @@ -329,9 +329,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) if (!warn) continue; - printk(KERN_WARNING - "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", - x86_cap_flag(df->feature), df->level); + pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", + x86_cap_flag(df->feature), df->level); } } @@ -510,7 +509,7 @@ void detect_ht(struct cpuinfo_x86 *c) smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { - printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); + pr_info_once("CPU0: Hyper-Threading is disabled\n"); goto out; } @@ -531,10 +530,10 @@ void detect_ht(struct cpuinfo_x86 *c) out: if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); + pr_info("CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + pr_info("CPU: Processor Core ID: %d\n", + c->cpu_core_id); printed = 1; } #endif @@ -559,9 +558,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c) } } - printk_once(KERN_ERR - "CPU: vendor_id '%s' unknown, using generic init.\n" \ - "CPU: Your system may be unstable.\n", v); + pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \ + "CPU: Your system may be unstable.\n", v); c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; @@ -760,7 +758,7 @@ void __init early_cpu_init(void) int count = 0; #ifdef CONFIG_PROCESSOR_SELECT - printk(KERN_INFO "KERNEL supported cpus:\n"); + pr_info("KERNEL supported cpus:\n"); #endif for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { @@ -778,7 +776,7 @@ void __init early_cpu_init(void) for (j = 0; j < 2; j++) { if (!cpudev->c_ident[j]) continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + pr_info(" %s %s\n", cpudev->c_vendor, cpudev->c_ident[j]); } } @@ -977,6 +975,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif + /* The boot/hotplug time assigment got cleared, restore it */ + c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); } /* @@ -1061,7 +1061,7 @@ static void __print_cpu_msr(void) for (index = index_min; index < index_max; index++) { if (rdmsrl_safe(index, &val)) continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + pr_info(" MSR%08x: %016llx\n", index, val); } } } @@ -1100,19 +1100,19 @@ void print_cpu_info(struct cpuinfo_x86 *c) } if (vendor && !strstr(c->x86_model_id, vendor)) - printk(KERN_CONT "%s ", vendor); + pr_cont("%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + pr_cont("%s", c->x86_model_id); else - printk(KERN_CONT "%d86", c->x86); + pr_cont("%d86", c->x86); - printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); + pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); + pr_cont(", stepping: 0x%x)\n", c->x86_mask); else - printk(KERN_CONT ")\n"); + pr_cont(")\n"); print_cpu_msr(c); } @@ -1438,7 +1438,7 @@ void cpu_init(void) show_ucode_info_early(); - printk(KERN_INFO "Initializing CPU#%d\n", cpu); + pr_info("Initializing CPU#%d\n", cpu); if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index aaf152e79637..187bb583d0df 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -103,7 +103,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c) local_irq_restore(flags); if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + pr_info("Recalibrating delay loop with SLOP bit reset\n"); calibrate_delay(); c->loops_per_jiffy = loops_per_jiffy; } @@ -115,7 +115,7 @@ static void set_cx86_reorder(void) { u8 ccr3; - printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); + pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -128,7 +128,7 @@ static void set_cx86_reorder(void) static void set_cx86_memwb(void) { - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); + pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); @@ -268,7 +268,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) * VSA1 we work around however. */ - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; /* We do this before the PCI layer is running. However we @@ -426,7 +426,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c) if (dir0 == 5 || dir0 == 3) { unsigned char ccr3; unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + pr_info("Enabling CPUID on Cyrix processor.\n"); local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); /* enable MAPEN */ diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index d820d8eae96b..73d391ae452f 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -56,7 +56,7 @@ detect_hypervisor_vendor(void) } if (max_pri) - printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name); + pr_info("Hypervisor detected: %s\n", x86_hyper->name); } void init_hypervisor(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 565648bc1a0a..38766c2b5b00 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && c->microcode < 0x20e) { - printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); + pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); } @@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { - printk(KERN_INFO "Disabled fast string operations\n"); + pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); } @@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } } #ifdef CONFIG_X86_32 @@ -176,7 +189,7 @@ int ppro_with_ram_bug(void) boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } return 0; @@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_F00F); if (!f00f_workaround_enabled) { - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } } @@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Forcefully enable PAE if kernel parameter "forcepae" is present. */ if (forcepae) { - printk(KERN_WARNING "PAE forced!\n"); + pr_warn("PAE forced!\n"); set_cpu_cap(c, X86_FEATURE_PAE); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0b6c52388cf4..6ed779efff26 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf, err = amd_set_l3_disable_slot(nb, cpu, slot, val); if (err) { if (err == -EEXIST) - pr_warning("L3 slot %d in use/index already disabled!\n", + pr_warn("L3 slot %d in use/index already disabled!\n", slot); return err; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 4cfba4371a71..517619ea6498 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -115,7 +115,7 @@ static int raise_local(void) int cpu = m->extcpu; if (m->inject_flags & MCJ_EXCEPTION) { - printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); + pr_info("Triggering MCE exception on CPU %d\n", cpu); switch (context) { case MCJ_CTX_IRQ: /* @@ -128,15 +128,15 @@ static int raise_local(void) raise_exception(m, NULL); break; default: - printk(KERN_INFO "Invalid MCE context\n"); + pr_info("Invalid MCE context\n"); ret = -EINVAL; } - printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); + pr_info("MCE exception done on CPU %d\n", cpu); } else if (m->status) { - printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); + pr_info("Starting machine check poll CPU %d\n", cpu); raise_poll(m); mce_notify_irq(); - printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); + pr_info("Machine check poll done on CPU %d\n", cpu); } else m->finished = 0; @@ -183,8 +183,7 @@ static void raise_mce(struct mce *m) start = jiffies; while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) { - printk(KERN_ERR - "Timeout waiting for mce inject %lx\n", + pr_err("Timeout waiting for mce inject %lx\n", *cpumask_bits(mce_inject_cpumask)); break; } @@ -241,7 +240,7 @@ static int inject_init(void) { if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; - printk(KERN_INFO "Machine check injector initialized\n"); + pr_info("Machine check injector initialized\n"); register_mce_write_callback(mce_write); register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 12402e10aeff..2a0717bf8033 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - printk(KERN_EMERG - "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", - smp_processor_id(), loaddr, lotype); + pr_emerg("CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", + smp_processor_id(), loaddr, lotype); if (lotype & (1<<5)) { - printk(KERN_EMERG - "CPU#%d: Possible thermal failure (CPU on fire ?).\n", - smp_processor_id()); + pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n", + smp_processor_id()); } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) /* Read registers before enabling: */ rdmsr(MSR_IA32_P5_MC_ADDR, l, h); rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO - "Intel old style machine check architecture supported.\n"); + pr_info("Intel old style machine check architecture supported.\n"); /* Enable MCE: */ cr4_set_bits(X86_CR4_MCE); - printk(KERN_INFO - "Intel old style machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); + pr_info("Intel old style machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2c5aaf8c2e2f..0b445c2ff735 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); @@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level) } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", - this_cpu, + pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); return 1; } @@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void) static void unexpected_thermal_interrupt(void) { - printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", - smp_processor_id()); + pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", + smp_processor_id()); } static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; @@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { if (system_state == SYSTEM_BOOTING) - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); + pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } @@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", - tm2 ? "TM2" : "TM1"); + pr_info_once("CPU0: Thermal monitoring enabled (%s)\n", + tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 7245980186ee..fcf9ae9384f4 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -12,8 +12,8 @@ static void default_threshold_interrupt(void) { - printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", - THRESHOLD_APIC_VECTOR); + pr_err("Unexpected threshold interrupt at vector %x\n", + THRESHOLD_APIC_VECTOR); } void (*mce_threshold_vector)(void) = default_threshold_interrupt; diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 01dd8702880b..c6a722e1d011 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) { ist_enter(regs); - printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); ist_exit(regs); @@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) cr4_set_bits(X86_CR4_MCE); - printk(KERN_INFO - "Winchip machine check reporting enabled on CPU#0.\n"); + pr_info("Winchip machine check reporting enabled on CPU#0.\n"); } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 2233f8a76615..75d3aab5f7b2 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -953,7 +953,7 @@ struct microcode_ops * __init init_amd_microcode(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 20e242ea1bc4..4e7c6933691c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("HyperV: features 0x%x, hints 0x%x\n", + ms_hyperv.features, ms_hyperv.hints); #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { @@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); lapic_timer_frequency = hv_lapic_frequency; - printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + pr_info("HyperV: LAPIC Timer Frequency: %#x\n", + lapic_timer_frequency); } #endif diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index 316fe3e60a97..3d689937fc1b 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t */ if (type != MTRR_TYPE_WRCOMB && (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - pr_warning("mtrr: only write-combining%s supported\n", + pr_warn("mtrr: only write-combining%s supported\n", centaur_mcr_type ? " and uncacheable are" : " is"); return -EINVAL; } diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0d98503c2245..31e951ce6dff 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -57,9 +57,9 @@ static int __initdata nr_range; static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) +#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0) -#define BIOS_BUG_MSG KERN_WARNING \ +#define BIOS_BUG_MSG \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" static int __init @@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, base, base + size); } if (debug_print) { - printk(KERN_DEBUG "After WB checking\n"); + pr_debug("After WB checking\n"); for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } @@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { /* Var MTRR contains UC entry below 1M? Skip it: */ - printk(BIOS_BUG_MSG, i); + pr_warn(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) continue; size -= (1<<(20-PAGE_SHIFT)) - base; @@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, extra_remove_base + extra_remove_size); if (debug_print) { - printk(KERN_DEBUG "After UC checking\n"); + pr_debug("After UC checking\n"); for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } } @@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, /* sort the ranges */ nr_range = clean_sort_range(range, RANGE_NUM); if (debug_print) { - printk(KERN_DEBUG "After sorting\n"); + pr_debug("After sorting\n"); for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } @@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void) start_base = to_size_factor(start_base, &start_factor), type = range_state[i].type; - printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : @@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; /* Print original var MTRRs at first, for debugging: */ - printk(KERN_DEBUG "original variable MTRRs\n"); + pr_debug("original variable MTRRs\n"); print_out_mtrr_range_state(); memset(range, 0, sizeof(range)); @@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits) x_remove_base, x_remove_size); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM covered: %ldM\n", + pr_info("total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { @@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits) if (!result[i].bad) { set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); + pr_debug("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } - printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " - "will find optimal one\n"); + pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n"); } i = 0; @@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits) x_remove_base, x_remove_size, i); if (debug_print) { mtrr_print_out_one_result(i); - printk(KERN_INFO "\n"); + pr_info("\n"); } i++; @@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits) index_good = mtrr_search_optimal_index(); if (index_good != -1) { - printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + pr_info("Found optimal setting for mtrr clean up\n"); i = index_good; mtrr_print_out_one_result(i); @@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits) gran_size <<= 10; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); + pr_debug("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } else { @@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_print_out_one_result(i); } - printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); - printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + pr_info("mtrr_cleanup: can not find optimal value\n"); + pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n"); return 0; } @@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ if (!highest_pfn) { - printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); + pr_info("CPU MTRRs all blank - virtualized system.\n"); return 0; } @@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) end_pfn); if (total_trim_size) { - pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); + pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", + total_trim_size >> 20); if (!changed_by_mtrr_cleanup) WARN_ON(1); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index c870af161008..fcbcb2f678ca 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) rdmsr(MSR_K8_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { - printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" + pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; @@ -501,14 +501,14 @@ void __init mtrr_state_warn(void) if (!mask) return; if (mask & MTRR_CHANGE_MASK_FIXED) - pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_VARIABLE) - pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); + pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_DEFTYPE) - pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); - printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); - printk(KERN_INFO "mtrr: corrected configuration.\n"); + pr_info("mtrr: probably your BIOS does not setup all CPUs.\n"); + pr_info("mtrr: corrected configuration.\n"); } /* @@ -519,8 +519,7 @@ void __init mtrr_state_warn(void) void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) { if (wrmsr_safe(msr, a, b) < 0) { - printk(KERN_ERR - "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", smp_processor_id(), msr, a, b); } } @@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1ULL<<(hi - 1)) - 1); if (tmp != mask) { - printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); + pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); mask = tmp; } @@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size, boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; } if (!(base + size < 0x70000 || base > 0x7003F) && (type == MTRR_TYPE_WRCOMB || type == MTRR_TYPE_WRBACK)) { - pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); return -EINVAL; } } @@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, lbase = lbase >> 1, last = last >> 1) ; if (lbase != last) { - pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); + pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); return -EINVAL; } return 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5c3d149ee91c..ba80d68f683e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; if (type >= MTRR_NUM_TYPES) { - pr_warning("mtrr: type: %u invalid\n", type); + pr_warn("mtrr: type: %u invalid\n", type); return -EINVAL; } /* If the type is WC, check that this processor supports it */ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - pr_warning("mtrr: your processor doesn't support write-combining\n"); + pr_warn("mtrr: your processor doesn't support write-combining\n"); return -ENOSYS; } if (!size) { - pr_warning("mtrr: zero sized request\n"); + pr_warn("mtrr: zero sized request\n"); return -EINVAL; } if ((base | (base + size - 1)) >> (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { - pr_warning("mtrr: base or size exceeds the MTRR width\n"); + pr_warn("mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, } else if (types_compatible(type, ltype)) continue; } - pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" + pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing" " 0x%lx000,0x%lx000\n", base, size, lbase, lsize); goto out; @@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (ltype != type) { if (types_compatible(type, ltype)) continue; - pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", base, size, mtrr_attrib_to_str(ltype), mtrr_attrib_to_str(type)); goto out; @@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); + pr_warn("mtrr: size and base must be multiples of 4 kiB\n"); pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; @@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg >= max) { - pr_warning("mtrr: register: %d too big\n", reg); + pr_warn("mtrr: register: %d too big\n", reg); goto out; } mtrr_if->get(reg, &lbase, &lsize, <ype); if (lsize < 1) { - pr_warning("mtrr: MTRR %d not used\n", reg); + pr_warn("mtrr: MTRR %d not used\n", reg); goto out; } if (mtrr_usage_table[reg] < 1) { - pr_warning("mtrr: reg: %d has count=0\n", reg); + pr_warn("mtrr: reg: %d has count=0\n", reg); goto out; } if (--mtrr_usage_table[reg] < 1) diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 819d94982e07..f6f50c4ceaec 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) for (i = 0; i < SANITY_CHECK_LOOPS; i++) { if (!rdrand_long(&tmp)) { clear_cpu_cap(c, X86_FEATURE_RDRAND); - printk_once(KERN_WARNING "rdrand: disabled\n"); + pr_warn_once("rdrand: disabled\n"); return; } } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 4c60eaf0571c..cd531355e838 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c) c->x86_max_cores = (core_level_siblings / smp_num_siblings); if (!printed) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + pr_info("CPU: Physical Processor ID: %d\n", c->phys_proc_id); if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", + pr_info("CPU: Processor Core ID: %d\n", c->cpu_core_id); printed = 1; } diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 252da7aceca6..e3b4d1841175 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) if (max >= 0x80860001) { cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); if (cpu_rev != 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n", (cpu_rev >> 24) & 0xff, (cpu_rev >> 16) & 0xff, (cpu_rev >> 8) & 0xff, @@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c) if (max >= 0x80860002) { cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); if (cpu_rev == 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", + pr_info("CPU: Processor revision %08X, %u MHz\n", new_cpu_rev, cpu_freq); } - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", (cms_rev1 >> 24) & 0xff, (cms_rev1 >> 16) & 0xff, (cms_rev1 >> 8) & 0xff, @@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) (void *)&cpu_info[56], (void *)&cpu_info[60]); cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); + pr_info("CPU: %s\n", cpu_info); } /* Unhide possibly hidden capability flags */ diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 628a059a9a06..364e58346897 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void) tsc_hz = eax | (((uint64_t)ebx) << 32); do_div(tsc_hz, 1000); BUG_ON(tsc_hz >> 32); - printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", + pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", (unsigned long) tsc_hz / 1000, (unsigned long) tsc_hz % 1000); @@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void) if (ebx != UINT_MAX) x86_platform.calibrate_tsc = vmware_get_tsc_khz; else - printk(KERN_WARNING - "Failed to get TSC freq from the hypervisor\n"); + pr_warn("Failed to get TSC freq from the hypervisor\n"); } /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 30ca7607cbbb..97340f2c437c 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.featureflag = boot_cpu_data.x86_capability[0]; + processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; for (i = 0; i < 2; i++) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8a2cdd736fa4..04b132a767f1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -30,6 +30,7 @@ #include <asm/nmi.h> #include <asm/x86_init.h> #include <asm/reboot.h> +#include <asm/cache.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -69,7 +70,7 @@ struct nmi_stats { static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); -static int ignore_nmis; +static int ignore_nmis __read_mostly; int unknown_nmi_panic; /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 24d57f77b3c1..3bf1e0b5f827 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Logical package management. We might want to allocate that dynamically */ +static int *physical_to_logical_pkg __read_mostly; +static unsigned long *physical_package_map __read_mostly;; +static unsigned long *logical_package_map __read_mostly; +static unsigned int max_physical_pkg_id __read_mostly; +unsigned int __max_logical_packages __read_mostly; +EXPORT_SYMBOL(__max_logical_packages); + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -251,6 +259,97 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_ONLINE); } +int topology_update_package_map(unsigned int apicid, unsigned int cpu) +{ + unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + /* Called from early boot ? */ + if (!physical_package_map) + return 0; + + if (pkg >= max_physical_pkg_id) + return -EINVAL; + + /* Set the logical package id */ + if (test_and_set_bit(pkg, physical_package_map)) + goto found; + + if (pkg < __max_logical_packages) { + set_bit(pkg, logical_package_map); + physical_to_logical_pkg[pkg] = pkg; + goto found; + } + new = find_first_zero_bit(logical_package_map, __max_logical_packages); + if (new >= __max_logical_packages) { + physical_to_logical_pkg[pkg] = -1; + pr_warn("APIC(%x) Package %u exceeds logical package map\n", + apicid, pkg); + return -ENOSPC; + } + set_bit(new, logical_package_map); + pr_info("APIC(%x) Converting physical %u to logical package %u\n", + apicid, pkg, new); + physical_to_logical_pkg[pkg] = new; + +found: + cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + return 0; +} + +/** + * topology_phys_to_logical_pkg - Map a physical package id to a logical + * + * Returns logical package id or -1 if not found + */ +int topology_phys_to_logical_pkg(unsigned int phys_pkg) +{ + if (phys_pkg >= max_physical_pkg_id) + return -1; + return physical_to_logical_pkg[phys_pkg]; +} +EXPORT_SYMBOL(topology_phys_to_logical_pkg); + +static void __init smp_init_package_map(void) +{ + unsigned int ncpus, cpu; + size_t size; + + /* + * Today neither Intel nor AMD support heterogenous systems. That + * might change in the future.... + */ + ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings; + __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + + /* + * Possibly larger than what we need as the number of apic ids per + * package can be smaller than the actual used apic ids. + */ + max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); + size = max_physical_pkg_id * sizeof(unsigned int); + physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); + memset(physical_to_logical_pkg, 0xff, size); + size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); + physical_package_map = kzalloc(size, GFP_KERNEL); + size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); + logical_package_map = kzalloc(size, GFP_KERNEL); + + pr_info("Max logical packages: %u\n", __max_logical_packages); + + for_each_present_cpu(cpu) { + unsigned int apicid = apic->cpu_present_to_apicid(cpu); + + if (apicid == BAD_APICID || !apic->apic_id_valid(apicid)) + continue; + if (!topology_update_package_map(apicid, cpu)) + continue; + pr_warn("CPU %u APICId %x disabled\n", cpu, apicid); + per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID; + set_cpu_possible(cpu, false); + set_cpu_present(cpu, false); + } +} + void __init smp_store_boot_cpu_info(void) { int id = 0; /* CPU 0 */ @@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; + smp_init_package_map(); } /* diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index f56cc418c87d..fd57d3ae7e16 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1529,7 +1529,7 @@ __init void lguest_init(void) */ cpu_detect(&new_cpu_data); /* head.S usually sets up the first capability word, so do it here. */ - new_cpu_data.x86_capability[0] = cpuid_edx(1); + new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); /* Math is always hard! */ set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d09e4c9d7cc5..2c261082eadf 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1654,7 +1654,7 @@ asmlinkage __visible void __init xen_start_kernel(void) cpu_detect(&new_cpu_data); set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); new_cpu_data.wp_works_ok = 1; - new_cpu_data.x86_capability[0] = cpuid_edx(1); + new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); #endif if (xen_start_info->mod_start) { diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 724a08740a04..9466354d3e49 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -11,7 +11,7 @@ #include "pmu.h" /* x86_pmu.handle_irq definition */ -#include "../kernel/cpu/perf_event.h" +#include "../events/perf_event.h" #define XENPMU_IRQ_PROCESSING 1 struct xenpmu { |