diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/events/amd/core.c | 21 | ||||
-rw-r--r-- | arch/x86/events/amd/ibs.c | 52 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/pmem.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/smp.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/amd_nb.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/powerflags.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 14 | ||||
-rw-r--r-- | arch/x86/ras/mce_amd_inj.c | 3 |
16 files changed, 108 insertions, 50 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 049ada8d4e9c..86a9bec18dab 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -369,7 +369,7 @@ static int amd_pmu_cpu_prepare(int cpu) WARN_ON_ONCE(cpuc->amd_nb); - if (boot_cpu_data.x86_max_cores < 2) + if (!x86_pmu.amd_nb_constraints) return NOTIFY_OK; cpuc->amd_nb = amd_alloc_nb(cpu); @@ -388,7 +388,7 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; - if (boot_cpu_data.x86_max_cores < 2) + if (!x86_pmu.amd_nb_constraints) return; nb_id = amd_get_nb_id(cpu); @@ -414,7 +414,7 @@ static void amd_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuhw; - if (boot_cpu_data.x86_max_cores < 2) + if (!x86_pmu.amd_nb_constraints) return; cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -648,6 +648,8 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_prepare = amd_pmu_cpu_prepare, .cpu_starting = amd_pmu_cpu_starting, .cpu_dead = amd_pmu_cpu_dead, + + .amd_nb_constraints = 1, }; static int __init amd_core_pmu_init(void) @@ -674,6 +676,11 @@ static int __init amd_core_pmu_init(void) x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + /* + * AMD Core perfctr has separate MSRs for the NB events, see + * the amd/uncore.c driver. + */ + x86_pmu.amd_nb_constraints = 0; pr_cont("core perfctr, "); return 0; @@ -693,6 +700,14 @@ __init int amd_pmu_init(void) if (ret) return ret; + if (num_possible_cpus() == 1) { + /* + * No point in allocating data structures to serialize + * against other CPUs, when there is only the one CPU. + */ + x86_pmu.amd_nb_constraints = 0; + } + /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 3ea25c3917c0..feb90f6730e8 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -28,10 +28,46 @@ static u32 ibs_caps; #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT + +/* + * IBS states: + * + * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken + * and any further add()s must fail. + * + * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are + * complicated by the fact that the IBS hardware can send late NMIs (ie. after + * we've cleared the EN bit). + * + * In order to consume these late NMIs we have the STOPPED state, any NMI that + * happens after we've cleared the EN state will clear this bit and report the + * NMI handled (this is fundamentally racy in the face or multiple NMI sources, + * someone else can consume our BIT and our NMI will go unhandled). + * + * And since we cannot set/clear this separate bit together with the EN bit, + * there are races; if we cleared STARTED early, an NMI could land in + * between clearing STARTED and clearing the EN bit (in fact multiple NMIs + * could happen if the period is small enough), and consume our STOPPED bit + * and trigger streams of unhandled NMIs. + * + * If, however, we clear STARTED late, an NMI can hit between clearing the + * EN bit and clearing STARTED, still see STARTED set and process the event. + * If this event will have the VALID bit clear, we bail properly, but this + * is not a given. With VALID set we can end up calling pmu::stop() again + * (the throttle logic) and trigger the WARNs in there. + * + * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() + * nesting, and clear STARTED late, so that we have a well defined state over + * the clearing of the EN bit. + * + * XXX: we could probably be using !atomic bitops for all this. + */ + enum ibs_states { IBS_ENABLED = 0, IBS_STARTED = 1, IBS_STOPPING = 2, + IBS_STOPPED = 3, IBS_MAX_STATES, }; @@ -377,11 +413,10 @@ static void perf_ibs_start(struct perf_event *event, int flags) perf_ibs_set_period(perf_ibs, hwc, &period); /* - * Set STARTED before enabling the hardware, such that - * a subsequent NMI must observe it. Then clear STOPPING - * such that we don't consume NMIs by accident. + * Set STARTED before enabling the hardware, such that a subsequent NMI + * must observe it. */ - set_bit(IBS_STARTED, pcpu->state); + set_bit(IBS_STARTED, pcpu->state); clear_bit(IBS_STOPPING, pcpu->state); perf_ibs_enable_event(perf_ibs, hwc, period >> 4); @@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags) u64 config; int stopping; + if (test_and_set_bit(IBS_STOPPING, pcpu->state)) + return; + stopping = test_bit(IBS_STARTED, pcpu->state); if (!stopping && (hwc->state & PERF_HES_UPTODATE)) @@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags) if (stopping) { /* - * Set STOPPING before disabling the hardware, such that it + * Set STOPPED before disabling the hardware, such that it * must be visible to NMIs the moment we clear the EN bit, * at which point we can generate an !VALID sample which * we need to consume. */ - set_bit(IBS_STOPPING, pcpu->state); + set_bit(IBS_STOPPED, pcpu->state); perf_ibs_disable_event(perf_ibs, hwc, config); /* * Clear STARTED after disabling the hardware; if it were @@ -556,7 +594,7 @@ fail: * with samples that even have the valid bit cleared. * Mark all this NMIs as handled. */ - if (test_and_clear_bit(IBS_STOPPING, pcpu->state)) + if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) return 1; return 0; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ba6ef18528c9..ad4dc7ffffb5 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -608,6 +608,11 @@ struct x86_pmu { atomic_t lbr_exclusive[x86_lbr_exclusive_max]; /* + * AMD bits + */ + unsigned int amd_nb_constraints : 1; + + /* * Extra registers for events */ struct extra_reg *extra_regs; @@ -795,6 +800,9 @@ ssize_t intel_event_sysfs_show(char *page, u64 config); struct attribute **merge_attr(struct attribute **a, struct attribute **b); +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); @@ -925,9 +933,6 @@ int p6_pmu_init(void); int knc_pmu_init(void); -ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, - char *page); - static inline int is_ht_workaround_enabled(void) { return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2da46ac16e37..426e946ed0c0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -190,6 +190,7 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 +/* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 #define MSR_CONFIG_TDP_LEVEL_2 0x0000064A @@ -210,13 +211,6 @@ #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 -/* Config TDP MSRs */ -#define MSR_CONFIG_TDP_NOMINAL 0x00000648 -#define MSR_CONFIG_TDP_LEVEL1 0x00000649 -#define MSR_CONFIG_TDP_LEVEL2 0x0000064A -#define MSR_CONFIG_TDP_CONTROL 0x0000064B -#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C - /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index bf8b35d2035a..fbc5e92e1ecc 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -47,6 +47,15 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, BUG(); } +static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, + size_t n) +{ + if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) + return memcpy_mcsafe(dst, (void __force *) src, n); + memcpy(dst, (void __force *) src, n); + return 0; +} + /** * arch_wmb_pmem - synchronize writes to persistent memory * diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 983738ac014c..9264476f3d57 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,8 +132,6 @@ struct cpuinfo_x86 { u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; - /* Compute unit id */ - u8 compute_unit_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 20a3de5cb3b0..66b057306f40 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -155,6 +155,7 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } +#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 82866697fcf1..ffae84df8a93 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -276,11 +276,9 @@ static inline bool is_ia32_task(void) */ #define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) -#endif /* !__ASSEMBLY__ */ - -#ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); -#endif +#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c24b4224d439..1fde8d580a5b 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -319,12 +319,6 @@ static inline void reset_lazy_tlbstate(void) #endif /* SMP */ -/* Not inlined due to inc_irq_stat not being defined yet */ -#define flush_tlb_local() { \ - inc_irq_stat(irq_tlb_count); \ - local_flush_tlb(); \ -} - #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, mm, start, end) \ native_flush_tlb_others(mask, mm, start, end) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 29fa475ec518..a147e676fc7b 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -170,15 +170,13 @@ int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; unsigned int mask; - int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return 0; pci_read_config_dword(link, 0x1d4, &mask); - cuid = cpu_data(cpu).compute_unit_id; - return (mask >> (4 * cuid)) & 0xf; + return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf; } int amd_set_subcaches(int cpu, unsigned long mask) @@ -204,7 +202,7 @@ int amd_set_subcaches(int cpu, unsigned long mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } - cuid = cpu_data(cpu).compute_unit_id; + cuid = cpu_data(cpu).cpu_core_id; mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6e47e3a916f1..7b76eb67a9b3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -300,7 +300,6 @@ static int nearby_node(int apicid) #ifdef CONFIG_SMP static void amd_get_topology(struct cpuinfo_x86 *c) { - u32 cores_per_cu = 1; u8 node_id; int cpu = smp_processor_id(); @@ -313,8 +312,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get compute unit information */ smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->compute_unit_id = ebx & 0xff; - cores_per_cu += ((ebx >> 8) & 3); + c->x86_max_cores /= smp_num_siblings; + c->cpu_core_id = ebx & 0xff; } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -325,19 +324,16 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* fixup multi-node processor information */ if (nodes_per_socket > 1) { - u32 cores_per_node; u32 cus_per_node; set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes_per_socket; - cus_per_node = cores_per_node / cores_per_cu; + cus_per_node = c->x86_max_cores / nodes_per_socket; /* store NodeID, use llc_shared_map to store sibling info */ per_cpu(cpu_llc_id, cpu) = node_id; /* core id has to be in the [0 .. cores_per_node - 1] range */ - c->cpu_core_id %= cores_per_node; - c->compute_unit_id %= cus_per_node; + c->cpu_core_id %= cus_per_node; } } #endif diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 0b445c2ff735..ac780cad3b86 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -384,6 +384,9 @@ static void intel_thermal_interrupt(void) { __u64 msr_val; + if (static_cpu_has(X86_FEATURE_HWP)) + wrmsrl_safe(MSR_HWP_STATUS, 0); + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); /* Check for violation of core thermal thresholds*/ diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 31f0f335ed22..1dd8294fd730 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c @@ -18,4 +18,6 @@ const char *const x86_power_flags[32] = { "", /* tsc invariant mapped to constant_tsc */ "cpb", /* core performance boost */ "eff_freq_ro", /* Readonly aperf/mperf */ + "proc_feedback", /* processor feedback interface */ + "acc_power", /* accumulated power mechanism */ }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b2c99f811c3f..a2065d3b3b39 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -422,7 +422,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->phys_proc_id == o->phys_proc_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->compute_unit_id == o->compute_unit_id) + c->cpu_core_id == o->cpu_core_id) return topology_sane(c, o, "smt"); } else if (c->phys_proc_id == o->phys_proc_id && diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8f4cc3dfac32..fe9b9f776361 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -104,10 +104,8 @@ static void flush_tlb_func(void *info) inc_irq_stat(irq_tlb_count); - if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) + if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; - if (!f->flush_end) - f->flush_end = f->flush_start + PAGE_SIZE; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { @@ -135,12 +133,20 @@ void native_flush_tlb_others(const struct cpumask *cpumask, unsigned long end) { struct flush_tlb_info info; + + if (end == 0) + end = start + PAGE_SIZE; info.flush_mm = mm; info.flush_start = start; info.flush_end = end; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); - trace_tlb_flush(TLB_REMOTE_SEND_IPI, end - start); + if (end == TLB_FLUSH_ALL) + trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); + else + trace_tlb_flush(TLB_REMOTE_SEND_IPI, + (end - start) >> PAGE_SHIFT); + if (is_uv_system()) { unsigned int cpu; diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 55d38cfa46c2..9e02dcaef683 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -20,6 +20,7 @@ #include <linux/pci.h> #include <asm/mce.h> +#include <asm/smp.h> #include <asm/amd_nb.h> #include <asm/irq_vectors.h> @@ -206,7 +207,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket(); + cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); return cores_per_node * node_id; } |