diff options
| -rw-r--r-- | arch/x86/include/asm/xen/interface.h | 123 | ||||
| -rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/xen/apic.c | 3 | ||||
| -rw-r--r-- | arch/x86/xen/enlighten.c | 12 | ||||
| -rw-r--r-- | arch/x86/xen/pmu.c | 170 | ||||
| -rw-r--r-- | arch/x86/xen/pmu.h | 11 | ||||
| -rw-r--r-- | arch/x86/xen/smp.c | 29 | ||||
| -rw-r--r-- | arch/x86/xen/suspend.c | 23 | ||||
| -rw-r--r-- | include/xen/interface/xen.h | 1 | ||||
| -rw-r--r-- | include/xen/interface/xenpmu.h | 33 | 
10 files changed, 398 insertions, 9 deletions
| diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 3b88eeacdbda..62ca03ef5c65 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -250,6 +250,129 @@ struct vcpu_guest_context {  #endif  };  DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); + +/* AMD PMU registers and structures */ +struct xen_pmu_amd_ctxt { +	/* +	 * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). +	 * For PV(H) guests these fields are RO. +	 */ +	uint32_t counters; +	uint32_t ctrls; + +	/* Counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +	uint64_t regs[]; +#elif defined(__GNUC__) +	uint64_t regs[0]; +#endif +}; + +/* Intel PMU registers and structures */ +struct xen_pmu_cntr_pair { +	uint64_t counter; +	uint64_t control; +}; + +struct xen_pmu_intel_ctxt { +	/* +	 * Offsets to fixed and architectural counter MSRs (relative to +	 * xen_pmu_arch.c.intel). +	 * For PV(H) guests these fields are RO. +	 */ +	uint32_t fixed_counters; +	uint32_t arch_counters; + +	/* PMU registers */ +	uint64_t global_ctrl; +	uint64_t global_ovf_ctrl; +	uint64_t global_status; +	uint64_t fixed_ctrl; +	uint64_t ds_area; +	uint64_t pebs_enable; +	uint64_t debugctl; + +	/* Fixed and architectural counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +	uint64_t regs[]; +#elif defined(__GNUC__) +	uint64_t regs[0]; +#endif +}; + +/* Sampled domain's registers */ +struct xen_pmu_regs { +	uint64_t ip; +	uint64_t sp; +	uint64_t flags; +	uint16_t cs; +	uint16_t ss; +	uint8_t cpl; +	uint8_t pad[3]; +}; + +/* PMU flags */ +#define PMU_CACHED	   (1<<0) /* PMU MSRs are cached in the context */ +#define PMU_SAMPLE_USER	   (1<<1) /* Sample is from user or kernel mode */ +#define PMU_SAMPLE_REAL	   (1<<2) /* Sample is from realmode */ +#define PMU_SAMPLE_PV	   (1<<3) /* Sample from a PV guest */ + +/* + * Architecture-specific information describing state of the processor at + * the time of PMU interrupt. + * Fields of this structure marked as RW for guest should only be written by + * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the + * hypervisor during PMU interrupt). Hypervisor will read updated data in + * XENPMU_flush hypercall and clear PMU_CACHED bit. + */ +struct xen_pmu_arch { +	union { +		/* +		 * Processor's registers at the time of interrupt. +		 * WO for hypervisor, RO for guests. +		 */ +		struct xen_pmu_regs regs; +		/* +		 * Padding for adding new registers to xen_pmu_regs in +		 * the future +		 */ +#define XENPMU_REGS_PAD_SZ  64 +		uint8_t pad[XENPMU_REGS_PAD_SZ]; +	} r; + +	/* WO for hypervisor, RO for guest */ +	uint64_t pmu_flags; + +	/* +	 * APIC LVTPC register. +	 * RW for both hypervisor and guest. +	 * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware +	 * during XENPMU_flush or XENPMU_lvtpc_set. +	 */ +	union { +		uint32_t lapic_lvtpc; +		uint64_t pad; +	} l; + +	/* +	 * Vendor-specific PMU registers. +	 * RW for both hypervisor and guest (see exceptions above). +	 * Guest's updates to this field are verified and then loaded by the +	 * hypervisor into hardware during XENPMU_flush +	 */ +	union { +		struct xen_pmu_amd_ctxt amd; +		struct xen_pmu_intel_ctxt intel; + +		/* +		 * Padding for contexts (fixed parts only, does not include +		 * MSR banks that are specified by offsets) +		 */ +#define XENPMU_CTXT_PAD_SZ  128 +		uint8_t pad[XENPMU_CTXT_PAD_SZ]; +	} c; +}; +  #endif	/* !__ASSEMBLY__ */  /* diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 4b6e29ac0968..e47e52787d32 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,7 +13,7 @@ CFLAGS_mmu.o			:= $(nostackp)  obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \  			time.o xen-asm.o xen-asm_$(BITS).o \  			grant-table.o suspend.o platform-pci-unplug.o \ -			p2m.o apic.o +			p2m.o apic.o pmu.o  obj-$(CONFIG_EVENT_TRACING) += trace.o diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 70e060ad879a..d03ebfa89b9f 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg)  static void xen_apic_write(u32 reg, u32 val)  { +	if (reg == APIC_LVTPC) +		return; +  	/* Warn to see if there's any stray references */  	WARN(1,"register: %x, value: %x\n", reg, val);  } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 373dbc9810d1..19072f91a8e2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -84,6 +84,7 @@  #include "mmu.h"  #include "smp.h"  #include "multicalls.h" +#include "pmu.h"  EXPORT_SYMBOL_GPL(hypercall_page); @@ -1082,6 +1083,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)  	return ret;  } +unsigned long long xen_read_pmc(int counter) +{ +	return 0; +} +  void xen_setup_shared_info(void)  {  	if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1216,7 +1222,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {  	.write_msr = xen_write_msr_safe,  	.read_tsc = native_read_tsc, -	.read_pmc = native_read_pmc, +	.read_pmc = xen_read_pmc,  	.read_tscp = native_read_tscp, @@ -1267,6 +1273,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {  static void xen_reboot(int reason)  {  	struct sched_shutdown r = { .reason = reason }; +	int cpu; + +	for_each_online_cpu(cpu) +		xen_pmu_finish(cpu);  	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))  		BUG(); diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c new file mode 100644 index 000000000000..1d1ae1b874ea --- /dev/null +++ b/arch/x86/xen/pmu.c @@ -0,0 +1,170 @@ +#include <linux/types.h> +#include <linux/interrupt.h> + +#include <asm/xen/hypercall.h> +#include <xen/page.h> +#include <xen/interface/xen.h> +#include <xen/interface/vcpu.h> +#include <xen/interface/xenpmu.h> + +#include "xen-ops.h" +#include "pmu.h" + +/* x86_pmu.handle_irq definition */ +#include "../kernel/cpu/perf_event.h" + + +/* Shared page between hypervisor and domain */ +static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared); +#define get_xenpmu_data()    per_cpu(xenpmu_shared, smp_processor_id()) + +/* perf callbacks */ +static int xen_is_in_guest(void) +{ +	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + +	if (!xenpmu_data) { +		pr_warn_once("%s: pmudata not initialized\n", __func__); +		return 0; +	} + +	if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) +		return 0; + +	return 1; +} + +static int xen_is_user_mode(void) +{ +	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + +	if (!xenpmu_data) { +		pr_warn_once("%s: pmudata not initialized\n", __func__); +		return 0; +	} + +	if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) +		return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); +	else +		return !!(xenpmu_data->pmu.r.regs.cpl & 3); +} + +static unsigned long xen_get_guest_ip(void) +{ +	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + +	if (!xenpmu_data) { +		pr_warn_once("%s: pmudata not initialized\n", __func__); +		return 0; +	} + +	return xenpmu_data->pmu.r.regs.ip; +} + +static struct perf_guest_info_callbacks xen_guest_cbs = { +	.is_in_guest            = xen_is_in_guest, +	.is_user_mode           = xen_is_user_mode, +	.get_guest_ip           = xen_get_guest_ip, +}; + +/* Convert registers from Xen's format to Linux' */ +static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, +			     struct pt_regs *regs, uint64_t pmu_flags) +{ +	regs->ip = xen_regs->ip; +	regs->cs = xen_regs->cs; +	regs->sp = xen_regs->sp; + +	if (pmu_flags & PMU_SAMPLE_PV) { +		if (pmu_flags & PMU_SAMPLE_USER) +			regs->cs |= 3; +		else +			regs->cs &= ~3; +	} else { +		if (xen_regs->cpl) +			regs->cs |= 3; +		else +			regs->cs &= ~3; +	} +} + +irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) +{ +	int ret = IRQ_NONE; +	struct pt_regs regs; +	const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + +	if (!xenpmu_data) { +		pr_warn_once("%s: pmudata not initialized\n", __func__); +		return ret; +	} + +	xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, +			 xenpmu_data->pmu.pmu_flags); +	if (x86_pmu.handle_irq(®s)) +		ret = IRQ_HANDLED; + +	return ret; +} + +bool is_xen_pmu(int cpu) +{ +	return (per_cpu(xenpmu_shared, cpu) != NULL); +} + +void xen_pmu_init(int cpu) +{ +	int err; +	struct xen_pmu_params xp; +	unsigned long pfn; +	struct xen_pmu_data *xenpmu_data; + +	BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); + +	if (xen_hvm_domain()) +		return; + +	xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); +	if (!xenpmu_data) { +		pr_err("VPMU init: No memory\n"); +		return; +	} +	pfn = virt_to_pfn(xenpmu_data); + +	xp.val = pfn_to_mfn(pfn); +	xp.vcpu = cpu; +	xp.version.maj = XENPMU_VER_MAJ; +	xp.version.min = XENPMU_VER_MIN; +	err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp); +	if (err) +		goto fail; + +	per_cpu(xenpmu_shared, cpu) = xenpmu_data; + +	if (cpu == 0) +		perf_register_guest_info_callbacks(&xen_guest_cbs); + +	return; + +fail: +	pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n", +		cpu, err); +	free_pages((unsigned long)xenpmu_data, 0); +} + +void xen_pmu_finish(int cpu) +{ +	struct xen_pmu_params xp; + +	if (xen_hvm_domain()) +		return; + +	xp.vcpu = cpu; +	xp.version.maj = XENPMU_VER_MAJ; +	xp.version.min = XENPMU_VER_MIN; + +	(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); + +	free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0); +	per_cpu(xenpmu_shared, cpu) = NULL; +} diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h new file mode 100644 index 000000000000..a76d2cf83581 --- /dev/null +++ b/arch/x86/xen/pmu.h @@ -0,0 +1,11 @@ +#ifndef __XEN_PMU_H +#define __XEN_PMU_H + +#include <xen/interface/xenpmu.h> + +irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); +void xen_pmu_init(int cpu); +void xen_pmu_finish(int cpu); +bool is_xen_pmu(int cpu); + +#endif /* __XEN_PMU_H */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 86484384492e..2a9ff7342791 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -26,6 +26,7 @@  #include <xen/interface/xen.h>  #include <xen/interface/vcpu.h> +#include <xen/interface/xenpmu.h>  #include <asm/xen/interface.h>  #include <asm/xen/hypercall.h> @@ -38,6 +39,7 @@  #include "xen-ops.h"  #include "mmu.h"  #include "smp.h" +#include "pmu.h"  cpumask_var_t xen_cpu_initialized_map; @@ -50,6 +52,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 };  static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 };  static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };  static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; +static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };  static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);  static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); @@ -148,11 +151,18 @@ static void xen_smp_intr_free(unsigned int cpu)  		kfree(per_cpu(xen_irq_work, cpu).name);  		per_cpu(xen_irq_work, cpu).name = NULL;  	} + +	if (per_cpu(xen_pmu_irq, cpu).irq >= 0) { +		unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL); +		per_cpu(xen_pmu_irq, cpu).irq = -1; +		kfree(per_cpu(xen_pmu_irq, cpu).name); +		per_cpu(xen_pmu_irq, cpu).name = NULL; +	}  };  static int xen_smp_intr_init(unsigned int cpu)  {  	int rc; -	char *resched_name, *callfunc_name, *debug_name; +	char *resched_name, *callfunc_name, *debug_name, *pmu_name;  	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);  	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, @@ -218,6 +228,18 @@ static int xen_smp_intr_init(unsigned int cpu)  	per_cpu(xen_irq_work, cpu).irq = rc;  	per_cpu(xen_irq_work, cpu).name = callfunc_name; +	if (is_xen_pmu(cpu)) { +		pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); +		rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, +					     xen_pmu_irq_handler, +					     IRQF_PERCPU|IRQF_NOBALANCING, +					     pmu_name, NULL); +		if (rc < 0) +			goto fail; +		per_cpu(xen_pmu_irq, cpu).irq = rc; +		per_cpu(xen_pmu_irq, cpu).name = pmu_name; +	} +  	return 0;   fail: @@ -335,6 +357,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)  	}  	set_cpu_sibling_map(0); +	xen_pmu_init(0); +  	if (xen_smp_intr_init(0))  		BUG(); @@ -462,6 +486,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)  	if (rc)  		return rc; +	xen_pmu_init(cpu); +  	rc = xen_smp_intr_init(cpu);  	if (rc)  		return rc; @@ -503,6 +529,7 @@ static void xen_cpu_die(unsigned int cpu)  		xen_smp_intr_free(cpu);  		xen_uninit_lock_cpu(cpu);  		xen_teardown_timer(cpu); +		xen_pmu_finish(cpu);  	}  } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 53b4c0811f4f..feddabdab448 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -11,6 +11,7 @@  #include "xen-ops.h"  #include "mmu.h" +#include "pmu.h"  static void xen_pv_pre_suspend(void)  { @@ -67,16 +68,26 @@ static void xen_pv_post_suspend(int suspend_cancelled)  void xen_arch_pre_suspend(void)  { -    if (xen_pv_domain()) -        xen_pv_pre_suspend(); +	int cpu; + +	for_each_online_cpu(cpu) +		xen_pmu_finish(cpu); + +	if (xen_pv_domain()) +		xen_pv_pre_suspend();  }  void xen_arch_post_suspend(int cancelled)  { -    if (xen_pv_domain()) -        xen_pv_post_suspend(cancelled); -    else -        xen_hvm_post_suspend(cancelled); +	int cpu; + +	if (xen_pv_domain()) +		xen_pv_post_suspend(cancelled); +	else +		xen_hvm_post_suspend(cancelled); + +	for_each_online_cpu(cpu) +		xen_pmu_init(cpu);  }  static void xen_vcpu_notify_restore(void *data) diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index e9d4501d1f5e..167071c290b3 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -113,6 +113,7 @@  #define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           */  #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     */  #define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */ +#define VIRQ_XENPMU     13  /* PMC interrupt                                 */  /* Architecture-specific VIRQ definitions. */  #define VIRQ_ARCH_0    16 diff --git a/include/xen/interface/xenpmu.h b/include/xen/interface/xenpmu.h index eac1b498b89f..ca42301949b5 100644 --- a/include/xen/interface/xenpmu.h +++ b/include/xen/interface/xenpmu.h @@ -56,4 +56,37 @@ struct xen_pmu_params {   */  #define XENPMU_FEATURE_INTEL_BTS  1 +/* + * Shared PMU data between hypervisor and PV(H) domains. + * + * The hypervisor fills out this structure during PMU interrupt and sends an + * interrupt to appropriate VCPU. + * Architecture-independent fields of xen_pmu_data are WO for the hypervisor + * and RO for the guest but some fields in xen_pmu_arch can be writable + * by both the hypervisor and the guest (see arch-$arch/pmu.h). + */ +struct xen_pmu_data { +	/* Interrupted VCPU */ +	uint32_t vcpu_id; + +	/* +	 * Physical processor on which the interrupt occurred. On non-privileged +	 * guests set to vcpu_id; +	 */ +	uint32_t pcpu_id; + +	/* +	 * Domain that was interrupted. On non-privileged guests set to +	 * DOMID_SELF. +	 * On privileged guests can be DOMID_SELF, DOMID_XEN, or, when in +	 * XENPMU_MODE_ALL mode, domain ID of another domain. +	 */ +	domid_t  domain_id; + +	uint8_t pad[6]; + +	/* Architecture-specific information */ +	struct xen_pmu_arch pmu; +}; +  #endif /* __XEN_PUBLIC_XENPMU_H__ */ |