diff options
140 files changed, 4650 insertions, 2661 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index c83bd6b4e6e8..d0d0bb9e3e25 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via  Synopsis of kprobe_events  ------------------------- -  p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS]	: Set a probe -  r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS]		: Set a return probe +  p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]	: Set a probe +  r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]		: Set a return probe    -:[GRP/]EVENT						: Clear a probe   GRP		: Group name. If omitted, use "kprobes" for it.   EVENT		: Event name. If omitted, the event name is generated -		  based on SYMBOL+offs or MEMADDR. - SYMBOL[+offs]	: Symbol+offset where the probe is inserted. +		  based on SYM+offs or MEMADDR. + MOD		: Module name which has given SYM. + SYM[+offs]	: Symbol+offset where the probe is inserted.   MEMADDR	: Address where the probe is inserted.   FETCHARGS	: Arguments. Each probe can have up to 128 args. @@ -1290,6 +1290,7 @@ help:  	@echo  '  make O=dir [targets] Locate all output files in "dir", including .config'  	@echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'  	@echo  '  make C=2   [targets] Force check of all c source with $$CHECK' +	@echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'  	@echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'  	@echo  '		1: warnings which may be relevant and do not occur too often'  	@echo  '		2: warnings which occur quite often but may still be relevant' diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 90561c45e7d8..8e47709160f8 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,  	data.period = event->hw.last_period;  	if (alpha_perf_event_set_period(event, hwc, idx)) { -		if (perf_event_overflow(event, 1, &data, regs)) { +		if (perf_event_overflow(event, &data, regs)) {  			/* Interrupts coming too quickly; "throttle" the  			 * counter, i.e., disable it for a little while.  			 */ diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 818e74ed45dc..f20d1b5396b8 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -91,7 +91,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);  #define test_irq_work_pending()      __get_cpu_var(irq_work_pending)  #define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0 -void set_irq_work_pending(void) +void arch_irq_work_raise(void)  {  	set_irq_work_pending_flag();  } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f1e8dd94afe8..dd7f3b9f4cb3 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -173,6 +173,20 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,  		},  	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	},  };  enum armv6mpcore_perf_types { @@ -310,6 +324,20 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,  		},  	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED, +		}, +	},  };  static inline unsigned long @@ -479,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,  		if (!armpmu_event_set_period(event, hwc, idx))  			continue; -		if (perf_event_overflow(event, 0, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			armpmu->disable(hwc, idx);  	} diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4960686afb58..e20ca9cafef5 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -255,6 +255,20 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,  		},  	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	},  };  /* @@ -371,6 +385,20 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,  		},  	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	},  };  /* @@ -787,7 +815,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)  		if (!armpmu_event_set_period(event, hwc, idx))  			continue; -		if (perf_event_overflow(event, 0, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			armpmu->disable(hwc, idx);  	} diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 39affbe4fdb2..3c4397491d08 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -144,6 +144,20 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,  		},  	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	},  };  #define	XSCALE_PMU_ENABLE	0x001 @@ -251,7 +265,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)  		if (!armpmu_event_set_period(event, hwc, idx))  			continue; -		if (perf_event_overflow(event, 0, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			armpmu->disable(hwc, idx);  	} @@ -583,7 +597,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)  		if (!armpmu_event_set_period(event, hwc, idx))  			continue; -		if (perf_event_overflow(event, 0, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			armpmu->disable(hwc, idx);  	} diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 97260060bf26..5c199610719f 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx)  /*   * Handle hitting a HW-breakpoint.   */ -static void ptrace_hbptriggered(struct perf_event *bp, int unused, +static void ptrace_hbptriggered(struct perf_event *bp,  				     struct perf_sample_data *data,  				     struct pt_regs *regs)  { @@ -479,7 +479,8 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)  	attr.bp_type	= type;  	attr.disabled	= 1; -	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk); +	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, +					   tsk);  }  static int ptrace_gethbpregs(struct task_struct *tsk, long num, diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 40ee7e5045e4..5f452f8fde05 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)  	unsigned int address, destreg, data, type;  	unsigned int res = 0; -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);  	if (current->pid != previous_pid) {  		pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index bc0e1d88fd3b..9ea4f7ddd665 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)  	fault = __do_page_fault(mm, addr, fsr, tsk);  	up_read(&mm->mmap_sem); -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);  	if (fault & VM_FAULT_MAJOR) -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);  	else if (fault & VM_FAULT_MINOR) -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);  	/*  	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR diff --git a/arch/mips/include/asm/stacktrace.h b/arch/mips/include/asm/stacktrace.h index 0bf82818aa53..780ee2c2a2ac 100644 --- a/arch/mips/include/asm/stacktrace.h +++ b/arch/mips/include/asm/stacktrace.h @@ -7,6 +7,10 @@  extern int raw_show_trace;  extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,  				  unsigned long pc, unsigned long *ra); +extern unsigned long unwind_stack_by_address(unsigned long stack_page, +					     unsigned long *sp, +					     unsigned long pc, +					     unsigned long *ra);  #else  #define raw_show_trace 1  static inline unsigned long unwind_stack(struct task_struct *task, diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index a8244854d3dc..d0deaab9ace2 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc,  	if (!mipspmu_event_set_period(event, hwc, idx))  		return; -	if (perf_event_overflow(event, 0, data, regs)) +	if (perf_event_overflow(event, data, regs))  		mipspmu->disable_event(idx);  } diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 75266ff4cc33..e5ad09a9baf7 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -377,6 +377,20 @@ static const struct mips_perf_event mipsxxcore_cache_map  		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },  	},  }, +[C(NODE)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +	}, +	[C(OP_WRITE)] = { +		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +	}, +	[C(OP_PREFETCH)] = { +		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +	}, +},  };  /* 74K core has completely different cache event map. */ @@ -480,6 +494,20 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map  		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID },  	},  }, +[C(NODE)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +	}, +	[C(OP_WRITE)] = { +		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +	}, +	[C(OP_PREFETCH)] = { +		[C(RESULT_ACCESS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +		[C(RESULT_MISS)]	= { UNSUPPORTED_PERF_EVENT_ID }, +	}, +},  };  #ifdef CONFIG_MIPS_MT_SMP diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d2112d3cf115..c28fbe6107bc 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -373,18 +373,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk)  #ifdef CONFIG_KALLSYMS -/* used by show_backtrace() */ -unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, -			   unsigned long pc, unsigned long *ra) +/* generic stack unwinding function */ +unsigned long notrace unwind_stack_by_address(unsigned long stack_page, +					      unsigned long *sp, +					      unsigned long pc, +					      unsigned long *ra)  { -	unsigned long stack_page;  	struct mips_frame_info info;  	unsigned long size, ofs;  	int leaf;  	extern void ret_from_irq(void);  	extern void ret_from_exception(void); -	stack_page = (unsigned long)task_stack_page(task);  	if (!stack_page)  		return 0; @@ -443,6 +443,15 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,  	*ra = 0;  	return __kernel_text_address(pc) ? pc : 0;  } +EXPORT_SYMBOL(unwind_stack_by_address); + +/* used by show_backtrace() */ +unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, +			   unsigned long pc, unsigned long *ra) +{ +	unsigned long stack_page = (unsigned long)task_stack_page(task); +	return unwind_stack_by_address(stack_page, sp, pc, ra); +}  #endif  /* diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e9b3af27d844..b7517e3abc85 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)  {  	if ((opcode & OPCODE) == LL) {  		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, -				1, 0, regs, 0); +				1, regs, 0);  		return simulate_ll(regs, opcode);  	}  	if ((opcode & OPCODE) == SC) {  		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, -				1, 0, regs, 0); +				1, regs, 0);  		return simulate_sc(regs, opcode);  	} @@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)  		int rd = (opcode & RD) >> 11;  		int rt = (opcode & RT) >> 16;  		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, -				1, 0, regs, 0); +				1, regs, 0);  		switch (rd) {  		case 0:		/* CPU number */  			regs->regs[rt] = smp_processor_id(); @@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)  {  	if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) {  		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, -				1, 0, regs, 0); +				1, regs, 0);  		return 0;  	} diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index cfea1adfa153..eb319b580353 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -111,8 +111,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,  	unsigned long value;  	unsigned int res; -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, -		      1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	/*  	 * This load never faults. @@ -517,7 +516,7 @@ asmlinkage void do_ade(struct pt_regs *regs)  	mm_segment_t seg;  	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, -			1, 0, regs, regs->cp0_badvaddr); +			1, regs, regs->cp0_badvaddr);  	/*  	 * Did we catch a fault trying to load an instruction?  	 * Or are we running in MIPS16 mode? diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index d32cb0503110..dbf2f93a5091 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -272,8 +272,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,  	}        emul: -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, -			1, 0, xcp, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);  	MIPS_FPU_EMU_INC_STATS(emulated);  	switch (MIPSInst_OPCODE(ir)) {  	case ldc1_op:{ diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 137ee76a0045..937cf3368164 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -145,7 +145,7 @@ good_area:  	 * the fault.  	 */  	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	if (unlikely(fault & VM_FAULT_ERROR)) {  		if (fault & VM_FAULT_OOM)  			goto out_of_memory; @@ -154,12 +154,10 @@ good_area:  		BUG();  	}  	if (fault & VM_FAULT_MAJOR) { -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, -				1, 0, regs, address); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);  		tsk->maj_flt++;  	} else { -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, -				1, 0, regs, address); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);  		tsk->min_flt++;  	} diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile index 4b9d7044e26c..29f2f13eb31c 100644 --- a/arch/mips/oprofile/Makefile +++ b/arch/mips/oprofile/Makefile @@ -8,7 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \  		oprofilefs.o oprofile_stats.o \  		timer_int.o ) -oprofile-y				:= $(DRIVER_OBJS) common.o +oprofile-y				:= $(DRIVER_OBJS) common.o backtrace.o  oprofile-$(CONFIG_CPU_MIPS32)		+= op_model_mipsxx.o  oprofile-$(CONFIG_CPU_MIPS64)		+= op_model_mipsxx.o diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c new file mode 100644 index 000000000000..6854ed5097d2 --- /dev/null +++ b/arch/mips/oprofile/backtrace.c @@ -0,0 +1,175 @@ +#include <linux/oprofile.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <asm/ptrace.h> +#include <asm/stacktrace.h> +#include <linux/stacktrace.h> +#include <linux/kernel.h> +#include <asm/sections.h> +#include <asm/inst.h> + +struct stackframe { +	unsigned long sp; +	unsigned long pc; +	unsigned long ra; +}; + +static inline int get_mem(unsigned long addr, unsigned long *result) +{ +	unsigned long *address = (unsigned long *) addr; +	if (!access_ok(VERIFY_READ, addr, sizeof(unsigned long))) +		return -1; +	if (__copy_from_user_inatomic(result, address, sizeof(unsigned long))) +		return -3; +	return 0; +} + +/* + * These two instruction helpers were taken from process.c + */ +static inline int is_ra_save_ins(union mips_instruction *ip) +{ +	/* sw / sd $ra, offset($sp) */ +	return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) +		&& ip->i_format.rs == 29 && ip->i_format.rt == 31; +} + +static inline int is_sp_move_ins(union mips_instruction *ip) +{ +	/* addiu/daddiu sp,sp,-imm */ +	if (ip->i_format.rs != 29 || ip->i_format.rt != 29) +		return 0; +	if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op) +		return 1; +	return 0; +} + +/* + * Looks for specific instructions that mark the end of a function. + * This usually means we ran into the code area of the previous function. + */ +static inline int is_end_of_function_marker(union mips_instruction *ip) +{ +	/* jr ra */ +	if (ip->r_format.func == jr_op && ip->r_format.rs == 31) +		return 1; +	/* lui gp */ +	if (ip->i_format.opcode == lui_op && ip->i_format.rt == 28) +		return 1; +	return 0; +} + +/* + * TODO for userspace stack unwinding: + * - handle cases where the stack is adjusted inside a function + *     (generally doesn't happen) + * - find optimal value for max_instr_check + * - try to find a way to handle leaf functions + */ + +static inline int unwind_user_frame(struct stackframe *old_frame, +				    const unsigned int max_instr_check) +{ +	struct stackframe new_frame = *old_frame; +	off_t ra_offset = 0; +	size_t stack_size = 0; +	unsigned long addr; + +	if (old_frame->pc == 0 || old_frame->sp == 0 || old_frame->ra == 0) +		return -9; + +	for (addr = new_frame.pc; (addr + max_instr_check > new_frame.pc) +		&& (!ra_offset || !stack_size); --addr) { +		union mips_instruction ip; + +		if (get_mem(addr, (unsigned long *) &ip)) +			return -11; + +		if (is_sp_move_ins(&ip)) { +			int stack_adjustment = ip.i_format.simmediate; +			if (stack_adjustment > 0) +				/* This marks the end of the previous function, +				   which means we overran. */ +				break; +			stack_size = (unsigned) stack_adjustment; +		} else if (is_ra_save_ins(&ip)) { +			int ra_slot = ip.i_format.simmediate; +			if (ra_slot < 0) +				/* This shouldn't happen. */ +				break; +			ra_offset = ra_slot; +		} else if (is_end_of_function_marker(&ip)) +			break; +	} + +	if (!ra_offset || !stack_size) +		return -1; + +	if (ra_offset) { +		new_frame.ra = old_frame->sp + ra_offset; +		if (get_mem(new_frame.ra, &(new_frame.ra))) +			return -13; +	} + +	if (stack_size) { +		new_frame.sp = old_frame->sp + stack_size; +		if (get_mem(new_frame.sp, &(new_frame.sp))) +			return -14; +	} + +	if (new_frame.sp > old_frame->sp) +		return -2; + +	new_frame.pc = old_frame->ra; +	*old_frame = new_frame; + +	return 0; +} + +static inline void do_user_backtrace(unsigned long low_addr, +				     struct stackframe *frame, +				     unsigned int depth) +{ +	const unsigned int max_instr_check = 512; +	const unsigned long high_addr = low_addr + THREAD_SIZE; + +	while (depth-- && !unwind_user_frame(frame, max_instr_check)) { +		oprofile_add_trace(frame->ra); +		if (frame->sp < low_addr || frame->sp > high_addr) +			break; +	} +} + +#ifndef CONFIG_KALLSYMS +static inline void do_kernel_backtrace(unsigned long low_addr, +				       struct stackframe *frame, +				       unsigned int depth) { } +#else +static inline void do_kernel_backtrace(unsigned long low_addr, +				       struct stackframe *frame, +				       unsigned int depth) +{ +	while (depth-- && frame->pc) { +		frame->pc = unwind_stack_by_address(low_addr, +						    &(frame->sp), +						    frame->pc, +						    &(frame->ra)); +		oprofile_add_trace(frame->ra); +	} +} +#endif + +void notrace op_mips_backtrace(struct pt_regs *const regs, unsigned int depth) +{ +	struct stackframe frame = { .sp = regs->regs[29], +				    .pc = regs->cp0_epc, +				    .ra = regs->regs[31] }; +	const int userspace = user_mode(regs); +	const unsigned long low_addr = ALIGN(frame.sp, THREAD_SIZE); + +	if (userspace) +		do_user_backtrace(low_addr, &frame, depth); +	else +		do_kernel_backtrace(low_addr, &frame, depth); +} diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index f9eb1aba6345..d1f2d4c52d42 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -115,6 +115,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)  	ops->start		= op_mips_start;  	ops->stop		= op_mips_stop;  	ops->cpu_type		= lmodel->cpu_type; +	ops->backtrace		= op_mips_backtrace;  	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",  	       lmodel->cpu_type); diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h index f04b54fb37d1..7c2da27ece04 100644 --- a/arch/mips/oprofile/op_impl.h +++ b/arch/mips/oprofile/op_impl.h @@ -36,4 +36,6 @@ struct op_mips_model {  	unsigned char num_counters;  }; +void op_mips_backtrace(struct pt_regs * const regs, unsigned int depth); +  #endif diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 45921672b97a..2cc41c715d2b 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type);  #define PPC_WARN_EMULATED(type, regs)					\  	do {								\  		perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,		\ -			1, 0, regs, 0);					\ +			1, regs, 0);					\  		__PPC_WARN_EMULATED(type);				\  	} while (0)  #define PPC_WARN_ALIGNMENT(type, regs)					\  	do {								\  		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,		\ -			1, 0, regs, regs->dar);				\ +			1, regs, regs->dar);				\  		__PPC_WARN_EMULATED(type);				\  	} while (0) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 1c33ec17ca36..80fd4d2b4a62 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -57,7 +57,7 @@ void hw_breakpoint_pmu_read(struct perf_event *bp);  extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);  extern struct pmu perf_ops_bp; -extern void ptrace_triggered(struct perf_event *bp, int nmi, +extern void ptrace_triggered(struct perf_event *bp,  			struct perf_sample_data *data, struct pt_regs *regs);  static inline void hw_breakpoint_disable(void)  { diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index b150b510510f..cb2e2949c8d1 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c @@ -75,6 +75,11 @@ static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1	},  		[C(OP_PREFETCH)] = {	-1,		-1	},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1 	}, +		[C(OP_WRITE)] = {	-1,		-1	}, +		[C(OP_PREFETCH)] = {	-1,		-1	}, +	},  };  static int num_events = 128; diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 2cc5e0301d0b..845a58478890 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -388,6 +388,11 @@ static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1	},  		[C(OP_PREFETCH)] = {	-1,		-1	},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1	}, +		[C(OP_WRITE)] = {	-1,		-1	}, +		[C(OP_PREFETCH)] = {	-1,		-1	}, +	},  };  struct power_pmu mpc7450_pmu = { diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 822f63008ae1..14967de98876 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1207,7 +1207,7 @@ struct pmu power_pmu = {   * here so there is no possibility of being interrupted.   */  static void record_and_restart(struct perf_event *event, unsigned long val, -			       struct pt_regs *regs, int nmi) +			       struct pt_regs *regs)  {  	u64 period = event->hw.sample_period;  	s64 prev, delta, left; @@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  		if (event->attr.sample_type & PERF_SAMPLE_ADDR)  			perf_get_data_addr(regs, &data.addr); -		if (perf_event_overflow(event, nmi, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			power_pmu_stop(event, 0);  	}  } @@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs)  		if ((int)val < 0) {  			/* event has overflowed */  			found = 1; -			record_and_restart(event, val, regs, nmi); +			record_and_restart(event, val, regs);  		}  	} diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index b0dc8f7069cd..0a6d2a9d569c 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = {   * here so there is no possibility of being interrupted.   */  static void record_and_restart(struct perf_event *event, unsigned long val, -			       struct pt_regs *regs, int nmi) +			       struct pt_regs *regs)  {  	u64 period = event->hw.sample_period;  	s64 prev, delta, left; @@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  		perf_sample_data_init(&data, 0);  		data.period = event->hw.last_period; -		if (perf_event_overflow(event, nmi, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			fsl_emb_pmu_stop(event, 0);  	}  } @@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs)  			if (event) {  				/* event has overflowed */  				found = 1; -				record_and_restart(event, val, regs, nmi); +				record_and_restart(event, val, regs);  			} else {  				/*  				 * Disabled counter is negative, diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index ead8b3c2649e..e9dbc2d35c9c 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -587,6 +587,11 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1	},  		[C(OP_PREFETCH)] = {	-1,		-1	},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1	}, +		[C(OP_WRITE)] = {	-1,		-1	}, +		[C(OP_PREFETCH)] = {	-1,		-1	}, +	},  };  static struct power_pmu power4_pmu = { diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index eca0ac595cb6..f58a2bd41b59 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -653,6 +653,11 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1		},  		[C(OP_PREFETCH)] = {	-1,		-1		},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1		}, +		[C(OP_WRITE)] = {	-1,		-1		}, +		[C(OP_PREFETCH)] = {	-1,		-1		}, +	},  };  static struct power_pmu power5p_pmu = { diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index d5ff0f64a5e6..b1acab684142 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -595,6 +595,11 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1		},  		[C(OP_PREFETCH)] = {	-1,		-1		},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1		}, +		[C(OP_WRITE)] = {	-1,		-1		}, +		[C(OP_PREFETCH)] = {	-1,		-1		}, +	},  };  static struct power_pmu power5_pmu = { diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 31603927e376..b24a3a23d073 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -516,6 +516,11 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1		},  		[C(OP_PREFETCH)] = {	-1,		-1		},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1		}, +		[C(OP_WRITE)] = {	-1,		-1		}, +		[C(OP_PREFETCH)] = {	-1,		-1		}, +	},  };  static struct power_pmu power6_pmu = { diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 593740fcb799..6d9dccb2ea59 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -342,6 +342,11 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1	},  		[C(OP_PREFETCH)] = {	-1,		-1	},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1	}, +		[C(OP_WRITE)] = {	-1,		-1	}, +		[C(OP_PREFETCH)] = {	-1,		-1	}, +	},  };  static struct power_pmu power7_pmu = { diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 9a6e093858fe..b121de9658eb 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -467,6 +467,11 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {  		[C(OP_WRITE)] = {	-1,		-1	},  		[C(OP_PREFETCH)] = {	-1,		-1	},  	}, +	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */ +		[C(OP_READ)] = {	-1,		-1	}, +		[C(OP_WRITE)] = {	-1,		-1	}, +		[C(OP_PREFETCH)] = {	-1,		-1	}, +	},  };  static struct power_pmu ppc970_pmu = { diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index cb22024f2b42..05b7dd217f60 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task)  }  #ifdef CONFIG_HAVE_HW_BREAKPOINT -void ptrace_triggered(struct perf_event *bp, int nmi, +void ptrace_triggered(struct perf_event *bp,  		      struct perf_sample_data *data, struct pt_regs *regs)  {  	struct perf_event_attr attr; @@ -973,7 +973,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  								&attr.bp_type);  	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, -							ptrace_triggered, task); +					       ptrace_triggered, NULL, task);  	if (IS_ERR(bp)) {  		thread->ptrace_bps[0] = NULL;  		ptrace_put_breakpoints(task); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index f33acfd872ad..03b29a6759ab 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -544,7 +544,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);  #endif /* 32 vs 64 bit */ -void set_irq_work_pending(void) +void arch_irq_work_raise(void)  {  	preempt_disable();  	set_irq_work_pending_flag(); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ad35f66c69e8..5efe8c96d37f 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,  		die("Weird page fault", regs, SIGSEGV);  	} -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	/* When running in the kernel we expect faults to occur only to  	 * addresses in user space.  All other faults represent errors in the @@ -320,7 +320,7 @@ good_area:  	}  	if (ret & VM_FAULT_MAJOR) {  		current->maj_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,  				     regs, address);  #ifdef CONFIG_PPC_SMLPAR  		if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -331,7 +331,7 @@ good_area:  #endif  	} else {  		current->min_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,  				     regs, address);  	}  	up_read(&mm->mmap_sem); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index fe103e891e7a..095f782a5512 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access,  		goto out;  	address = trans_exc_code & __FAIL_ADDR_MASK; -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	flags = FAULT_FLAG_ALLOW_RETRY;  	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)  		flags |= FAULT_FLAG_WRITE; @@ -345,11 +345,11 @@ retry:  	if (flags & FAULT_FLAG_ALLOW_RETRY) {  		if (fault & VM_FAULT_MAJOR) {  			tsk->maj_flt++; -			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,  				      regs, address);  		} else {  			tsk->min_flt++; -			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,  				      regs, address);  		}  		if (fault & VM_FAULT_RETRY) { diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c index 748955df018d..fa4f724b295a 100644 --- a/arch/sh/kernel/cpu/sh4/perf_event.c +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -180,6 +180,21 @@ static const int sh7750_cache_events  			[ C(RESULT_MISS)   ] = -1,  		},  	}, + +	[ C(NODE) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	},  };  static int sh7750_event_map(int event) diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c index 17e6bebfede0..84a2c396ceee 100644 --- a/arch/sh/kernel/cpu/sh4a/perf_event.c +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -205,6 +205,21 @@ static const int sh4a_cache_events  			[ C(RESULT_MISS)   ] = -1,  		},  	}, + +	[ C(NODE) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	},  };  static int sh4a_event_map(int event) diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 3d7b209b2178..92b3c276339a 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,  	return 0;  } -void ptrace_triggered(struct perf_event *bp, int nmi, +void ptrace_triggered(struct perf_event *bp,  		      struct perf_sample_data *data, struct pt_regs *regs)  {  	struct perf_event_attr attr; @@ -91,7 +91,8 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)  		attr.bp_len = HW_BREAKPOINT_LEN_2;  		attr.bp_type = HW_BREAKPOINT_R; -		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); +		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, +						 NULL, tsk);  		if (IS_ERR(bp))  			return PTR_ERR(bp); diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index b51a17104b5f..d9006f8ffc14 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,  	 */  	if (!expected) {  		unaligned_fixups_notify(current, instruction, regs); -		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, +		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1,  			      regs, address);  	} diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index 6713ca97e553..67110be83fd7 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs,  		return error;  	} -	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);  	destreg = (opcode >> 4) & 0x3f;  	if (user_mode(regs)) { @@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs,  		return error;  	} -	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);  	srcreg = (opcode >> 4) & 0x3f;  	if (user_mode(regs)) { @@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs,  		return error;  	} -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);  	destreg = (opcode >> 4) & 0x3f;  	if (user_mode(regs)) { @@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs,  		return error;  	} -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);  	srcreg = (opcode >> 4) & 0x3f;  	if (user_mode(regs)) { diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index f76a5090d5d1..977195210653 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)  	struct task_struct *tsk = current;  	struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu); -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {  		/* initialize once. */ diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index d4c34d757f0d..7bebd044f2a1 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,  	if ((regs->sr & SR_IMASK) != SR_IMASK)  		local_irq_enable(); -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	/*  	 * If we're in an interrupt, have no user context or are running @@ -210,11 +210,11 @@ good_area:  	}  	if (fault & VM_FAULT_MAJOR) {  		tsk->maj_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,  				     regs, address);  	} else {  		tsk->min_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,  				     regs, address);  	} diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index 7f5810f5dfdc..e3430e093d43 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,  	/* Not an IO address, so reenable interrupts */  	local_irq_enable(); -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	/*  	 * If we're in an interrupt or have no user @@ -200,11 +200,11 @@ good_area:  	if (fault & VM_FAULT_MAJOR) {  		tsk->maj_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,  				     regs, address);  	} else {  		tsk->min_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,  				     regs, address);  	} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2cb0e1c001e2..62a034318b18 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -246,6 +246,20 @@ static const cache_map_t ultra3_cache_map = {  		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },  	},  }, +[C(NODE)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +},  };  static const struct sparc_pmu ultra3_pmu = { @@ -361,6 +375,20 @@ static const cache_map_t niagara1_cache_map = {  		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },  	},  }, +[C(NODE)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +},  };  static const struct sparc_pmu niagara1_pmu = { @@ -473,6 +501,20 @@ static const cache_map_t niagara2_cache_map = {  		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },  	},  }, +[C(NODE)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +},  };  static const struct sparc_pmu niagara2_pmu = { @@ -1277,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,  		if (!sparc_perf_event_set_period(event, hwc, idx))  			continue; -		if (perf_event_overflow(event, 1, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			sparc_pmu_stop(event, 0);  	} diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 4491f4cb2695..7efbb2f9e77f 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)  		unsigned long addr = compute_effective_address(regs, insn);  		int err; -		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); +		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);  		switch (dir) {  		case load:  			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), @@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)  		}  		addr = compute_effective_address(regs, insn); -		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); +		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);  		switch(dir) {  		case load:  			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index b2b019ea8caa..35cff1673aa4 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)  		addr = compute_effective_address(regs, insn,  						 ((insn >> 25) & 0x1f)); -		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); +		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);  		switch (asi) {  		case ASI_NL:  		case ASI_AIUPL: @@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)  	int ret, i, rd = ((insn >> 25) & 0x1f);  	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	if (insn & 0x2000) {  		maybe_flush_windows(0, 0, rd, from_kernel);  		value = sign_extend_imm13(insn); @@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)  	int asi = decode_asi(insn, regs);  	int flag = (freg < 32) ? FPRS_DL : FPRS_DU; -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	save_and_clear_fpu();  	current_thread_info()->xfsr[0] &= ~0x1c000; @@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)  	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;  	unsigned long *reg; -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	maybe_flush_windows(0, 0, rd, from_kernel);  	reg = fetch_reg_addr(rd, regs); @@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr  	if (tstate & TSTATE_PRIV)  		die_if_kernel("lddfmna from kernel", regs); -	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); +	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);  	if (test_thread_flag(TIF_32BIT))  		pc = (u32)pc;  	if (get_user(insn, (u32 __user *) pc) != -EFAULT) { @@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr  	if (tstate & TSTATE_PRIV)  		die_if_kernel("stdfmna from kernel", regs); -	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); +	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);  	if (test_thread_flag(TIF_32BIT))  		pc = (u32)pc;  	if (get_user(insn, (u32 __user *) pc) != -EFAULT) { diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c index 36357717d691..32b626c9d815 100644 --- a/arch/sparc/kernel/visemul.c +++ b/arch/sparc/kernel/visemul.c @@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn)  	BUG_ON(regs->tstate & TSTATE_PRIV); -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	if (test_thread_flag(TIF_32BIT))  		pc = (u32)pc; diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c index a3fccde894ec..aa4d55b0bdf0 100644 --- a/arch/sparc/math-emu/math_32.c +++ b/arch/sparc/math-emu/math_32.c @@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)  	int retcode = 0;                               /* assume all succeed */  	unsigned long insn; -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  #ifdef DEBUG_MATHEMU  	printk("In do_mathemu()... pc is %08lx\n", regs->pc); diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c index 56d2c44747b8..e575bd2fe381 100644 --- a/arch/sparc/math-emu/math_64.c +++ b/arch/sparc/math-emu/math_64.c @@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)  	if (tstate & TSTATE_PRIV)  		die_if_kernel("unfinished/unimplemented FPop from kernel", regs); -	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); +	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);  	if (test_thread_flag(TIF_32BIT))  		pc = (u32)pc;  	if (get_user(insn, (u32 __user *) pc) != -EFAULT) { diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 7543ddbdadb2..aa1c1b1ce5cc 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,          if (in_atomic() || !mm)                  goto no_context; -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	down_read(&mm->mmap_sem); @@ -301,12 +301,10 @@ good_area:  	}  	if (fault & VM_FAULT_MAJOR) {  		current->maj_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, -			      regs, address); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);  	} else {  		current->min_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, -			      regs, address); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);  	}  	up_read(&mm->mmap_sem);  	return; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index f92ce56a8b22..504c0622f729 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)  	if (in_atomic() || !mm)  		goto intr_or_no_mm; -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	if (!down_read_trylock(&mm->mmap_sem)) {  		if ((regs->tstate & TSTATE_PRIV) && @@ -433,12 +433,10 @@ good_area:  	}  	if (fault & VM_FAULT_MAJOR) {  		current->maj_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, -			      regs, address); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);  	} else {  		current->min_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, -			      regs, address); +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);  	}  	up_read(&mm->mmap_sem); diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 5745ce8bf108..bba3cf88e624 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -60,23 +60,24 @@ static inline void native_halt(void)  #include <asm/paravirt.h>  #else  #ifndef __ASSEMBLY__ +#include <linux/types.h> -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void)  {  	return native_save_fl();  } -static inline void arch_local_irq_restore(unsigned long flags) +static inline notrace void arch_local_irq_restore(unsigned long flags)  {  	native_restore_fl(flags);  } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void)  {  	native_irq_disable();  } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void)  {  	native_irq_enable();  } @@ -102,7 +103,7 @@ static inline void halt(void)  /*   * For spinlocks, etc:   */ -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void)  {  	unsigned long flags = arch_local_save_flags();  	arch_local_irq_disable(); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index d9d4dae305f6..094fb30817ab 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -152,6 +152,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);  	(regs)->bp = caller_frame_pointer();			\  	(regs)->cs = __KERNEL_CS;				\  	regs->flags = 0;					\ +	asm volatile(						\ +		_ASM_MOV "%%"_ASM_SP ", %0\n"			\ +		: "=m" ((regs)->sp)				\ +		:: "memory"					\ +	);							\  }  #else diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 56fd9e3abbda..4f7e67e2345e 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -102,6 +102,14 @@  #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)  /* + * If an event has alias it should be marked + * with a special bit. (Don't forget to check + * P4_PEBS_CONFIG_MASK and related bits on + * modification.) + */ +#define P4_CONFIG_ALIASABLE		(1 << 9) + +/*   * The bits we allow to pass for RAW events   */  #define P4_CONFIG_MASK_ESCR		\ @@ -123,6 +131,31 @@  	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR))	| \  	(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) +/* + * In case of event aliasing we need to preserve some + * caller bits, otherwise the mapping won't be complete. + */ +#define P4_CONFIG_EVENT_ALIAS_MASK			  \ +	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)	| \ +	 p4_config_pack_cccr(P4_CCCR_EDGE		| \ +			     P4_CCCR_THRESHOLD_MASK	| \ +			     P4_CCCR_COMPLEMENT		| \ +			     P4_CCCR_COMPARE)) + +#define  P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS		  \ +	((P4_CONFIG_HT)					| \ +	 p4_config_pack_escr(P4_ESCR_T0_OS		| \ +			     P4_ESCR_T0_USR		| \ +			     P4_ESCR_T1_OS		| \ +			     P4_ESCR_T1_USR)		| \ +	 p4_config_pack_cccr(P4_CCCR_OVF		| \ +			     P4_CCCR_CASCADE		| \ +			     P4_CCCR_FORCE_OVF		| \ +			     P4_CCCR_THREAD_ANY		| \ +			     P4_CCCR_OVF_PMI_T0		| \ +			     P4_CCCR_OVF_PMI_T1		| \ +			     P4_CONFIG_ALIASABLE)) +  static inline bool p4_is_event_cascaded(u64 config)  {  	u32 cccr = p4_config_unpack_cccr(config); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 99ddd148a760..36361bf6fdd1 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -555,6 +555,9 @@ struct __large_struct { unsigned long buf[100]; };  #endif /* CONFIG_X86_WP_WORKS_OK */ +extern unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +  /*   * movsl can be slow when source and dest are not both 8-byte aligned   */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3a0338b4b179..4ee3abf20ed6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -22,7 +22,6 @@  #include <linux/sched.h>  #include <linux/uaccess.h>  #include <linux/slab.h> -#include <linux/highmem.h>  #include <linux/cpu.h>  #include <linux/bitops.h> @@ -45,38 +44,27 @@ do {								\  #endif  /* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + *          |   NHM/WSM    |      SNB     | + * register ------------------------------- + *          |  HT  | no HT |  HT  | no HT | + *----------------------------------------- + * offcore  | core | core  | cpu  | core  | + * lbr_sel  | core | core  | cpu  | core  | + * ld_lat   | cpu  | core  | cpu  | core  | + *----------------------------------------- + * + * Given that there is a small number of shared regs, + * we can pre-allocate their slot in the per-cpu + * per-core reg tables.   */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ -	unsigned long offset, addr = (unsigned long)from; -	unsigned long size, len = 0; -	struct page *page; -	void *map; -	int ret; - -	do { -		ret = __get_user_pages_fast(addr, 1, 0, &page); -		if (!ret) -			break; - -		offset = addr & (PAGE_SIZE - 1); -		size = min(PAGE_SIZE - offset, n - len); - -		map = kmap_atomic(page); -		memcpy(to, map+offset, size); -		kunmap_atomic(map); -		put_page(page); +enum extra_reg_type { +	EXTRA_REG_NONE  = -1,	/* not used */ -		len  += size; -		to   += size; -		addr += size; +	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */ +	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */ -	} while (len < n); - -	return len; -} +	EXTRA_REG_MAX		/* number of entries needed */ +};  struct event_constraint {  	union { @@ -132,11 +120,10 @@ struct cpu_hw_events {  	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];  	/* -	 * Intel percore register state. -	 * Coordinate shared resources between HT threads. +	 * manage shared (per-core, per-cpu) registers +	 * used on Intel NHM/WSM/SNB  	 */ -	int				percore_used; /* Used by this CPU? */ -	struct intel_percore		*per_core; +	struct intel_shared_regs	*shared_regs;  	/*  	 * AMD specific bits @@ -187,26 +174,45 @@ struct cpu_hw_events {  	for ((e) = (c); (e)->weight; (e)++)  /* + * Per register state. + */ +struct er_account { +	raw_spinlock_t		lock;	/* per-core: protect structure */ +	u64			config;	/* extra MSR config */ +	u64			reg;	/* extra MSR number */ +	atomic_t		ref;	/* reference count */ +}; + +/*   * Extra registers for specific events. + *   * Some events need large masks and require external MSRs. - * Define a mapping to these extra registers. + * Those extra MSRs end up being shared for all events on + * a PMU and sometimes between PMU of sibling HT threads. + * In either case, the kernel needs to handle conflicting + * accesses to those extra, shared, regs. The data structure + * to manage those registers is stored in cpu_hw_event.   */  struct extra_reg {  	unsigned int		event;  	unsigned int		msr;  	u64			config_mask;  	u64			valid_mask; +	int			idx;  /* per_xxx->regs[] reg index */  }; -#define EVENT_EXTRA_REG(e, ms, m, vm) {	\ +#define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\  	.event = (e),		\  	.msr = (ms),		\  	.config_mask = (m),	\  	.valid_mask = (vm),	\ +	.idx = EXTRA_REG_##i	\  	} -#define INTEL_EVENT_EXTRA_REG(event, msr, vm)	\ -	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) -#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) + +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\ +	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) + +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)  union perf_capabilities {  	struct { @@ -252,7 +258,6 @@ struct x86_pmu {  	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,  						 struct perf_event *event);  	struct event_constraint *event_constraints; -	struct event_constraint *percore_constraints;  	void		(*quirks)(void);  	int		perfctr_second_write; @@ -286,8 +291,12 @@ struct x86_pmu {  	 * Extra registers for events  	 */  	struct extra_reg *extra_regs; +	unsigned int er_flags;  }; +#define ERF_NO_HT_SHARING	1 +#define ERF_HAS_RSP_1		2 +  static struct x86_pmu x86_pmu __read_mostly;  static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { @@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index)   */  static int x86_pmu_extra_regs(u64 config, struct perf_event *event)  { +	struct hw_perf_event_extra *reg;  	struct extra_reg *er; -	event->hw.extra_reg = 0; -	event->hw.extra_config = 0; +	reg = &event->hw.extra_reg;  	if (!x86_pmu.extra_regs)  		return 0; @@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)  			continue;  		if (event->attr.config1 & ~er->valid_mask)  			return -EINVAL; -		event->hw.extra_reg = er->msr; -		event->hw.extra_config = event->attr.config1; + +		reg->idx = er->idx; +		reg->config = event->attr.config1; +		reg->reg = er->msr;  		break;  	}  	return 0; @@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event)  	event->hw.last_cpu = -1;  	event->hw.last_tag = ~0ULL; +	/* mark unused */ +	event->hw.extra_reg.idx = EXTRA_REG_NONE; +  	return x86_pmu.hw_config(event);  } @@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu)  static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,  					  u64 enable_mask)  { -	if (hwc->extra_reg) -		wrmsrl(hwc->extra_reg, hwc->extra_config); +	if (hwc->extra_reg.reg) +		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);  	wrmsrl(hwc->config_base, hwc->config | enable_mask);  } @@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)  		if (!x86_perf_event_set_period(event))  			continue; -		if (perf_event_overflow(event, 1, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			x86_pmu_stop(event, 0);  	} @@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)  	perf_pmu_enable(pmu);  	return 0;  } +/* + * a fake_cpuc is used to validate event groups. Due to + * the extra reg logic, we need to also allocate a fake + * per_core and per_cpu structure. Otherwise, group events + * using extra reg may conflict without the kernel being + * able to catch this when the last event gets added to + * the group. + */ +static void free_fake_cpuc(struct cpu_hw_events *cpuc) +{ +	kfree(cpuc->shared_regs); +	kfree(cpuc); +} + +static struct cpu_hw_events *allocate_fake_cpuc(void) +{ +	struct cpu_hw_events *cpuc; +	int cpu = raw_smp_processor_id(); + +	cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); +	if (!cpuc) +		return ERR_PTR(-ENOMEM); + +	/* only needed, if we have extra_regs */ +	if (x86_pmu.extra_regs) { +		cpuc->shared_regs = allocate_shared_regs(cpu); +		if (!cpuc->shared_regs) +			goto error; +	} +	return cpuc; +error: +	free_fake_cpuc(cpuc); +	return ERR_PTR(-ENOMEM); +}  /*   * validate that we can schedule this event @@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event)  	struct event_constraint *c;  	int ret = 0; -	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); -	if (!fake_cpuc) -		return -ENOMEM; +	fake_cpuc = allocate_fake_cpuc(); +	if (IS_ERR(fake_cpuc)) +		return PTR_ERR(fake_cpuc);  	c = x86_pmu.get_event_constraints(fake_cpuc, event); @@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event)  	if (x86_pmu.put_event_constraints)  		x86_pmu.put_event_constraints(fake_cpuc, event); -	kfree(fake_cpuc); +	free_fake_cpuc(fake_cpuc);  	return ret;  } @@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event)  {  	struct perf_event *leader = event->group_leader;  	struct cpu_hw_events *fake_cpuc; -	int ret, n; - -	ret = -ENOMEM; -	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); -	if (!fake_cpuc) -		goto out; +	int ret = -ENOSPC, n; +	fake_cpuc = allocate_fake_cpuc(); +	if (IS_ERR(fake_cpuc)) +		return PTR_ERR(fake_cpuc);  	/*  	 * the event is not yet connected with its  	 * siblings therefore we must first collect  	 * existing siblings, then add the new event  	 * before we can simulate the scheduling  	 */ -	ret = -ENOSPC;  	n = collect_events(fake_cpuc, leader, true);  	if (n < 0) -		goto out_free; +		goto out;  	fake_cpuc->n_events = n;  	n = collect_events(fake_cpuc, event, false);  	if (n < 0) -		goto out_free; +		goto out;  	fake_cpuc->n_events = n;  	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); -out_free: -	kfree(fake_cpuc);  out: +	free_fake_cpuc(fake_cpuc);  	return ret;  } diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index fe29c1d2219e..941caa2e449b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids  		[ C(RESULT_MISS)   ] = -1,  	},   }, + [ C(NODE) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ +		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + },  };  /* diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 41178c826c48..45fbb8f7f549 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1,25 +1,15 @@  #ifdef CONFIG_CPU_SUP_INTEL -#define MAX_EXTRA_REGS 2 - -/* - * Per register state. - */ -struct er_account { -	int			ref;		/* reference count */ -	unsigned int		extra_reg;	/* extra MSR number */ -	u64			extra_config;	/* extra MSR config */ -}; -  /* - * Per core state - * This used to coordinate shared registers for HT threads. + * Per core/cpu state + * + * Used to coordinate shared registers between HT threads or + * among events on a single PMU.   */ -struct intel_percore { -	raw_spinlock_t		lock;		/* protect structure */ -	struct er_account	regs[MAX_EXTRA_REGS]; -	int			refcnt;		/* number of threads */ -	unsigned		core_id; +struct intel_shared_regs { +	struct er_account       regs[EXTRA_REG_MAX]; +	int                     refcnt;		/* per-core: #HT threads */ +	unsigned                core_id;	/* per-core: core id */  };  /* @@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =  static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =  { -	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), +	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),  	EVENT_EXTRA_END  }; -static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = -{ -	INTEL_EVENT_CONSTRAINT(0xb7, 0), -	EVENT_CONSTRAINT_END -}; -  static struct event_constraint intel_westmere_event_constraints[] __read_mostly =  {  	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =  	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */  	/* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */  	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ -	INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ -	INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */  	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */  	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */  	EVENT_CONSTRAINT_END @@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =  static struct extra_reg intel_westmere_extra_regs[] __read_mostly =  { -	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), -	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), +	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), +	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),  	EVENT_EXTRA_END  }; -static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = +static struct event_constraint intel_v1_event_constraints[] __read_mostly =  { -	INTEL_EVENT_CONSTRAINT(0xb7, 0), -	INTEL_EVENT_CONSTRAINT(0xbb, 0),  	EVENT_CONSTRAINT_END  }; @@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =  	EVENT_CONSTRAINT_END  }; +static struct extra_reg intel_snb_extra_regs[] __read_mostly = { +	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), +	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), +	EVENT_EXTRA_END +}; +  static u64 intel_pmu_event_map(int hw_event)  {  	return intel_perfmon_event_map[hw_event]; @@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids  		[ C(RESULT_MISS)   ] = -1,  	},   }, + [ C(NODE) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, +  };  static __initconst const u64 westmere_hw_cache_event_ids @@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids  		[ C(RESULT_MISS)   ] = -1,  	},   }, + [ C(NODE) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0x01b7, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0x01b7, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0x01b7, +	}, + },  };  /* @@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs  		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,  		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,  	}, - } + }, + [ C(NODE) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, +		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, +		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, +		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, +	}, + },  };  static __initconst const u64 nehalem_hw_cache_event_ids @@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids  		[ C(RESULT_MISS)   ] = -1,  	},   }, + [ C(NODE) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0x01b7, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0x01b7, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0x01b7, +	}, + },  };  static __initconst const u64 core2_hw_cache_event_ids @@ -1003,7 +1046,7 @@ again:  		data.period = event->hw.last_period; -		if (perf_event_overflow(event, 1, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			x86_pmu_stop(event, 0);  	} @@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event)  	return NULL;  } +static bool intel_try_alt_er(struct perf_event *event, int orig_idx) +{ +	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) +		return false; + +	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { +		event->hw.config &= ~INTEL_ARCH_EVENT_MASK; +		event->hw.config |= 0x01bb; +		event->hw.extra_reg.idx = EXTRA_REG_RSP_1; +		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; +	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { +		event->hw.config &= ~INTEL_ARCH_EVENT_MASK; +		event->hw.config |= 0x01b7; +		event->hw.extra_reg.idx = EXTRA_REG_RSP_0; +		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; +	} + +	if (event->hw.extra_reg.idx == orig_idx) +		return false; + +	return true; +} + +/* + * manage allocation of shared extra msr for certain events + * + * sharing can be: + * per-cpu: to be shared between the various events on a single PMU + * per-core: per-cpu + shared by HT threads + */  static struct event_constraint * -intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, +				   struct perf_event *event)  { -	struct hw_perf_event *hwc = &event->hw; -	unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; -	struct event_constraint *c; -	struct intel_percore *pc; +	struct event_constraint *c = &emptyconstraint; +	struct hw_perf_event_extra *reg = &event->hw.extra_reg;  	struct er_account *era; -	int i; -	int free_slot; -	int found; +	unsigned long flags; +	int orig_idx = reg->idx; -	if (!x86_pmu.percore_constraints || hwc->extra_alloc) -		return NULL; +	/* already allocated shared msr */ +	if (reg->alloc) +		return &unconstrained; -	for (c = x86_pmu.percore_constraints; c->cmask; c++) { -		if (e != c->code) -			continue; +again: +	era = &cpuc->shared_regs->regs[reg->idx]; +	/* +	 * we use spin_lock_irqsave() to avoid lockdep issues when +	 * passing a fake cpuc +	 */ +	raw_spin_lock_irqsave(&era->lock, flags); + +	if (!atomic_read(&era->ref) || era->config == reg->config) { + +		/* lock in msr value */ +		era->config = reg->config; +		era->reg = reg->reg; + +		/* one more user */ +		atomic_inc(&era->ref); + +		/* no need to reallocate during incremental event scheduling */ +		reg->alloc = 1;  		/* -		 * Allocate resource per core. +		 * All events using extra_reg are unconstrained. +		 * Avoids calling x86_get_event_constraints() +		 * +		 * Must revisit if extra_reg controlling events +		 * ever have constraints. Worst case we go through +		 * the regular event constraint table.  		 */ -		pc = cpuc->per_core; -		if (!pc) -			break; -		c = &emptyconstraint; -		raw_spin_lock(&pc->lock); -		free_slot = -1; -		found = 0; -		for (i = 0; i < MAX_EXTRA_REGS; i++) { -			era = &pc->regs[i]; -			if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { -				/* Allow sharing same config */ -				if (hwc->extra_config == era->extra_config) { -					era->ref++; -					cpuc->percore_used = 1; -					hwc->extra_alloc = 1; -					c = NULL; -				} -				/* else conflict */ -				found = 1; -				break; -			} else if (era->ref == 0 && free_slot == -1) -				free_slot = i; -		} -		if (!found && free_slot != -1) { -			era = &pc->regs[free_slot]; -			era->ref = 1; -			era->extra_reg = hwc->extra_reg; -			era->extra_config = hwc->extra_config; -			cpuc->percore_used = 1; -			hwc->extra_alloc = 1; -			c = NULL; -		} -		raw_spin_unlock(&pc->lock); -		return c; +		c = &unconstrained; +	} else if (intel_try_alt_er(event, orig_idx)) { +		raw_spin_unlock(&era->lock); +		goto again;  	} +	raw_spin_unlock_irqrestore(&era->lock, flags); -	return NULL; +	return c; +} + +static void +__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, +				   struct hw_perf_event_extra *reg) +{ +	struct er_account *era; + +	/* +	 * only put constraint if extra reg was actually +	 * allocated. Also takes care of event which do +	 * not use an extra shared reg +	 */ +	if (!reg->alloc) +		return; + +	era = &cpuc->shared_regs->regs[reg->idx]; + +	/* one fewer user */ +	atomic_dec(&era->ref); + +	/* allocate again next time */ +	reg->alloc = 0; +} + +static struct event_constraint * +intel_shared_regs_constraints(struct cpu_hw_events *cpuc, +			      struct perf_event *event) +{ +	struct event_constraint *c = NULL; + +	if (event->hw.extra_reg.idx != EXTRA_REG_NONE) +		c = __intel_shared_reg_get_constraints(cpuc, event); + +	return c;  }  static struct event_constraint * @@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event  	if (c)  		return c; -	c = intel_percore_constraints(cpuc, event); +	c = intel_shared_regs_constraints(cpuc, event);  	if (c)  		return c;  	return x86_get_event_constraints(cpuc, event);  } -static void intel_put_event_constraints(struct cpu_hw_events *cpuc, +static void +intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,  					struct perf_event *event)  { -	struct extra_reg *er; -	struct intel_percore *pc; -	struct er_account *era; -	struct hw_perf_event *hwc = &event->hw; -	int i, allref; +	struct hw_perf_event_extra *reg; -	if (!cpuc->percore_used) -		return; - -	for (er = x86_pmu.extra_regs; er->msr; er++) { -		if (er->event != (hwc->config & er->config_mask)) -			continue; +	reg = &event->hw.extra_reg; +	if (reg->idx != EXTRA_REG_NONE) +		__intel_shared_reg_put_constraints(cpuc, reg); +} -		pc = cpuc->per_core; -		raw_spin_lock(&pc->lock); -		for (i = 0; i < MAX_EXTRA_REGS; i++) { -			era = &pc->regs[i]; -			if (era->ref > 0 && -			    era->extra_config == hwc->extra_config && -			    era->extra_reg == er->msr) { -				era->ref--; -				hwc->extra_alloc = 0; -				break; -			} -		} -		allref = 0; -		for (i = 0; i < MAX_EXTRA_REGS; i++) -			allref += pc->regs[i].ref; -		if (allref == 0) -			cpuc->percore_used = 0; -		raw_spin_unlock(&pc->lock); -		break; -	} +static void intel_put_event_constraints(struct cpu_hw_events *cpuc, +					struct perf_event *event) +{ +	intel_put_shared_regs_event_constraints(cpuc, event);  }  static int intel_pmu_hw_config(struct perf_event *event) @@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = {  	.event_constraints	= intel_core_event_constraints,  }; +static struct intel_shared_regs *allocate_shared_regs(int cpu) +{ +	struct intel_shared_regs *regs; +	int i; + +	regs = kzalloc_node(sizeof(struct intel_shared_regs), +			    GFP_KERNEL, cpu_to_node(cpu)); +	if (regs) { +		/* +		 * initialize the locks to keep lockdep happy +		 */ +		for (i = 0; i < EXTRA_REG_MAX; i++) +			raw_spin_lock_init(®s->regs[i].lock); + +		regs->core_id = -1; +	} +	return regs; +} +  static int intel_pmu_cpu_prepare(int cpu)  {  	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); -	if (!cpu_has_ht_siblings()) +	if (!x86_pmu.extra_regs)  		return NOTIFY_OK; -	cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), -				      GFP_KERNEL, cpu_to_node(cpu)); -	if (!cpuc->per_core) +	cpuc->shared_regs = allocate_shared_regs(cpu); +	if (!cpuc->shared_regs)  		return NOTIFY_BAD; -	raw_spin_lock_init(&cpuc->per_core->lock); -	cpuc->per_core->core_id = -1;  	return NOTIFY_OK;  } @@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu)  	 */  	intel_pmu_lbr_reset(); -	if (!cpu_has_ht_siblings()) +	if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))  		return;  	for_each_cpu(i, topology_thread_cpumask(cpu)) { -		struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; +		struct intel_shared_regs *pc; +		pc = per_cpu(cpu_hw_events, i).shared_regs;  		if (pc && pc->core_id == core_id) { -			kfree(cpuc->per_core); -			cpuc->per_core = pc; +			kfree(cpuc->shared_regs); +			cpuc->shared_regs = pc;  			break;  		}  	} -	cpuc->per_core->core_id = core_id; -	cpuc->per_core->refcnt++; +	cpuc->shared_regs->core_id = core_id; +	cpuc->shared_regs->refcnt++;  }  static void intel_pmu_cpu_dying(int cpu)  {  	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); -	struct intel_percore *pc = cpuc->per_core; +	struct intel_shared_regs *pc; +	pc = cpuc->shared_regs;  	if (pc) {  		if (pc->core_id == -1 || --pc->refcnt == 0)  			kfree(pc); -		cpuc->per_core = NULL; +		cpuc->shared_regs = NULL;  	}  	fini_debug_store_on_cpu(cpu); @@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void)  		x86_pmu.event_constraints = intel_nehalem_event_constraints;  		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; -		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;  		x86_pmu.enable_all = intel_pmu_nhm_enable_all;  		x86_pmu.extra_regs = intel_nehalem_extra_regs; @@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void)  		intel_pmu_lbr_init_nhm();  		x86_pmu.event_constraints = intel_westmere_event_constraints; -		x86_pmu.percore_constraints = intel_westmere_percore_constraints;  		x86_pmu.enable_all = intel_pmu_nhm_enable_all;  		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;  		x86_pmu.extra_regs = intel_westmere_extra_regs; +		x86_pmu.er_flags |= ERF_HAS_RSP_1;  		/* UOPS_ISSUED.STALLED_CYCLES */  		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; @@ -1502,6 +1597,10 @@ static __init int intel_pmu_init(void)  		x86_pmu.event_constraints = intel_snb_event_constraints;  		x86_pmu.pebs_constraints = intel_snb_pebs_events; +		x86_pmu.extra_regs = intel_snb_extra_regs; +		/* all extra regs are per-cpu when HT is on */ +		x86_pmu.er_flags |= ERF_HAS_RSP_1; +		x86_pmu.er_flags |= ERF_NO_HT_SHARING;  		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */  		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; @@ -1512,11 +1611,19 @@ static __init int intel_pmu_init(void)  		break;  	default: -		/* -		 * default constraints for v2 and up -		 */ -		x86_pmu.event_constraints = intel_gen_event_constraints; -		pr_cont("generic architected perfmon, "); +		switch (x86_pmu.version) { +		case 1: +			x86_pmu.event_constraints = intel_v1_event_constraints; +			pr_cont("generic architected perfmon v1, "); +			break; +		default: +			/* +			 * default constraints for v2 and up +			 */ +			x86_pmu.event_constraints = intel_gen_event_constraints; +			pr_cont("generic architected perfmon, "); +			break; +		}  	}  	return 0;  } @@ -1528,4 +1635,8 @@ static int intel_pmu_init(void)  	return 0;  } +static struct intel_shared_regs *allocate_shared_regs(int cpu) +{ +	return NULL; +}  #endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index bab491b8ee25..1b1ef3addcfd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void)  	 */  	perf_prepare_sample(&header, &data, event, ®s); -	if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) +	if (perf_output_begin(&handle, event, header.size * (top - at)))  		return 1;  	for (; at < top; at++) { @@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  	else  		regs.flags &= ~PERF_EFLAGS_EXACT; -	if (perf_event_overflow(event, 1, &data, ®s)) +	if (perf_event_overflow(event, &data, ®s))  		x86_pmu_stop(event, 0);  } diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ead584fb6a7d..7809d2bcb209 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids  		[ C(RESULT_MISS)   ] = -1,  	},   }, + [ C(NODE) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + },  }; +/* + * Because of Netburst being quite restricted in how many + * identical events may run simultaneously, we introduce event aliases, + * ie the different events which have the same functionality but + * utilize non-intersected resources (ESCR/CCCR/counter registers). + * + * This allow us to relax restrictions a bit and run two or more + * identical events together. + * + * Never set any custom internal bits such as P4_CONFIG_HT, + * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are + * either up to date automatically or not applicable at all. + */ +struct p4_event_alias { +	u64 original; +	u64 alternative; +} p4_event_aliases[] = { +	{ +		/* +		 * Non-halted cycles can be substituted with non-sleeping cycles (see +		 * Intel SDM Vol3b for details). We need this alias to be able +		 * to run nmi-watchdog and 'perf top' (or any other user space tool +		 * which is interested in running PERF_COUNT_HW_CPU_CYCLES) +		 * simultaneously. +		 */ +	.original	= +		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		| +				    P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), +	.alternative	= +		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)		| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)	| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)	| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)	| +				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| +		p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT		| +				    P4_CCCR_COMPARE), +	}, +}; + +static u64 p4_get_alias_event(u64 config) +{ +	u64 config_match; +	int i; + +	/* +	 * Only event with special mark is allowed, +	 * we're to be sure it didn't come as malformed +	 * RAW event. +	 */ +	if (!(config & P4_CONFIG_ALIASABLE)) +		return 0; + +	config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; + +	for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { +		if (config_match == p4_event_aliases[i].original) { +			config_match = p4_event_aliases[i].alternative; +			break; +		} else if (config_match == p4_event_aliases[i].alternative) { +			config_match = p4_event_aliases[i].original; +			break; +		} +	} + +	if (i >= ARRAY_SIZE(p4_event_aliases)) +		return 0; + +	return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); +} +  static u64 p4_general_events[PERF_COUNT_HW_MAX] = {    /* non-halted CPU clocks */    [PERF_COUNT_HW_CPU_CYCLES] =  	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		| -		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), +		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))	| +		P4_CONFIG_ALIASABLE,    /*     * retired instructions @@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)  		if (!x86_perf_event_set_period(event))  			continue; -		if (perf_event_overflow(event, 1, &data, regs)) +		if (perf_event_overflow(event, &data, regs))  			x86_pmu_stop(event, 0);  	} @@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign  	struct p4_event_bind *bind;  	unsigned int i, thread, num;  	int cntr_idx, escr_idx; +	u64 config_alias; +	int pass;  	bitmap_zero(used_mask, X86_PMC_IDX_MAX);  	bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); @@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign  		hwc = &cpuc->event_list[i]->hw;  		thread = p4_ht_thread(cpu); +		pass = 0; + +again: +		/* +		 * It's possible to hit a circular lock +		 * between original and alternative events +		 * if both are scheduled already. +		 */ +		if (pass > 2) +			goto done; +  		bind = p4_config_get_bind(hwc->config);  		escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);  		if (unlikely(escr_idx == -1)) @@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign  		}  		cntr_idx = p4_next_cntr(thread, used_mask, bind); -		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) -			goto done; +		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { +			/* +			 * Check whether an event alias is still available. +			 */ +			config_alias = p4_get_alias_event(hwc->config); +			if (!config_alias) +				goto done; +			hwc->config = config_alias; +			pass++; +			goto again; +		}  		p4_pmu_swap_config_ts(hwc, cpu);  		if (assign) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index e71c98d3c0d2..19853ad8afc5 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -105,34 +105,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,  }  /* - * We are returning from the irq stack and go to the previous one. - * If the previous stack is also in the irq stack, then bp in the first - * frame of the irq stack points to the previous, interrupted one. - * Otherwise we have another level of indirection: We first save - * the bp of the previous stack, then we switch the stack to the irq one - * and save a new bp that links to the previous one. - * (See save_args()) - */ -static inline unsigned long -fixup_bp_irq_link(unsigned long bp, unsigned long *stack, -		  unsigned long *irq_stack, unsigned long *irq_stack_end) -{ -#ifdef CONFIG_FRAME_POINTER -	struct stack_frame *frame = (struct stack_frame *)bp; -	unsigned long next; - -	if (!in_irq_stack(stack, irq_stack, irq_stack_end)) { -		if (!probe_kernel_address(&frame->next_frame, next)) -			return next; -		else -			WARN_ONCE(1, "Perf: bad frame pointer = %p in " -				  "callchain\n", &frame->next_frame); -	} -#endif -	return bp; -} - -/*   * x86-64 can have up to three kernel stacks:   * process stack   * interrupt stack @@ -155,9 +127,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,  		task = current;  	if (!stack) { -		stack = &dummy; -		if (task && task != current) +		if (regs) +			stack = (unsigned long *)regs->sp; +		else if (task && task != current)  			stack = (unsigned long *)task->thread.sp; +		else +			stack = &dummy;  	}  	if (!bp) @@ -205,8 +180,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,  				 * pointer (index -1 to end) in the IRQ stack:  				 */  				stack = (unsigned long *) (irq_stack_end[-1]); -				bp = fixup_bp_irq_link(bp, stack, irq_stack, -						       irq_stack_end);  				irq_stack_end = NULL;  				ops->stack(data, "EOI");  				continue; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8a445a0c989e..d656f68371a4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -297,27 +297,26 @@ ENDPROC(native_usergs_sysret64)  	.endm  /* save partial stack frame */ -	.pushsection .kprobes.text, "ax" -ENTRY(save_args) -	XCPT_FRAME +	.macro SAVE_ARGS_IRQ  	cld -	/* -	 * start from rbp in pt_regs and jump over -	 * return address. -	 */ -	movq_cfi rdi, RDI+8-RBP -	movq_cfi rsi, RSI+8-RBP -	movq_cfi rdx, RDX+8-RBP -	movq_cfi rcx, RCX+8-RBP -	movq_cfi rax, RAX+8-RBP -	movq_cfi  r8,  R8+8-RBP -	movq_cfi  r9,  R9+8-RBP -	movq_cfi r10, R10+8-RBP -	movq_cfi r11, R11+8-RBP - -	leaq -RBP+8(%rsp),%rdi	/* arg1 for handler */ -	movq_cfi rbp, 8		/* push %rbp */ -	leaq 8(%rsp), %rbp		/* mov %rsp, %ebp */ +	/* start from rbp in pt_regs and jump over */ +	movq_cfi rdi, RDI-RBP +	movq_cfi rsi, RSI-RBP +	movq_cfi rdx, RDX-RBP +	movq_cfi rcx, RCX-RBP +	movq_cfi rax, RAX-RBP +	movq_cfi  r8,  R8-RBP +	movq_cfi  r9,  R9-RBP +	movq_cfi r10, R10-RBP +	movq_cfi r11, R11-RBP + +	/* Save rbp so that we can unwind from get_irq_regs() */ +	movq_cfi rbp, 0 + +	/* Save previous stack value */ +	movq %rsp, %rsi + +	leaq -RBP(%rsp),%rdi	/* arg1 for handler */  	testl $3, CS(%rdi)  	je 1f  	SWAPGS @@ -329,19 +328,14 @@ ENTRY(save_args)  	 */  1:	incl PER_CPU_VAR(irq_count)  	jne 2f -	popq_cfi %rax			/* move return address... */  	mov PER_CPU_VAR(irq_stack_ptr),%rsp  	EMPTY_FRAME 0 -	pushq_cfi %rbp			/* backlink for unwinder */ -	pushq_cfi %rax			/* ... to the new stack */ -	/* -	 * We entered an interrupt context - irqs are off: -	 */ -2:	TRACE_IRQS_OFF -	ret -	CFI_ENDPROC -END(save_args) -	.popsection + +2:	/* Store previous stack value */ +	pushq %rsi +	/* We entered an interrupt context - irqs are off: */ +	TRACE_IRQS_OFF +	.endm  ENTRY(save_rest)  	PARTIAL_FRAME 1 REST_SKIP+8 @@ -791,7 +785,7 @@ END(interrupt)  	/* reserve pt_regs for scratch regs and rbp */  	subq $ORIG_RAX-RBP, %rsp  	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP -	call save_args +	SAVE_ARGS_IRQ  	PARTIAL_FRAME 0  	call \func  	.endm @@ -814,15 +808,14 @@ ret_from_intr:  	DISABLE_INTERRUPTS(CLBR_NONE)  	TRACE_IRQS_OFF  	decl PER_CPU_VAR(irq_count) -	leaveq -	CFI_RESTORE		rbp +	/* Restore saved previous stack */ +	popq %rsi +	leaq 16(%rsi), %rsp +  	CFI_DEF_CFA_REGISTER	rsp -	CFI_ADJUST_CFA_OFFSET	-8 +	CFI_ADJUST_CFA_OFFSET	-16 -	/* we did not save rbx, restore only from ARGOFFSET */ -	addq $8, %rsp -	CFI_ADJUST_CFA_OFFSET	-8  exit_intr:  	GET_THREAD_INFO(%rcx)  	testl $3,CS-ARGOFFSET(%rsp) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5f9ecff328b5..00354d4919a9 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -608,7 +608,7 @@ int kgdb_arch_init(void)  	return register_die_notifier(&kgdb_notifier);  } -static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, +static void kgdb_hw_overflow_handler(struct perf_event *event,  		struct perf_sample_data *data, struct pt_regs *regs)  {  	struct task_struct *tsk = current; @@ -638,7 +638,7 @@ void kgdb_arch_late(void)  	for (i = 0; i < HBP_NUM; i++) {  		if (breakinfo[i].pev)  			continue; -		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); +		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);  		if (IS_ERR((void * __force)breakinfo[i].pev)) {  			printk(KERN_ERR "kgdb: Could not allocate hw"  			       "breakpoints\nDisabling the kernel debugger\n"); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 807c2a2b80f1..82528799c5de 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target,  	return ret;  } -static void ptrace_triggered(struct perf_event *bp, int nmi, +static void ptrace_triggered(struct perf_event *bp,  			     struct perf_sample_data *data,  			     struct pt_regs *regs)  { @@ -715,7 +715,8 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,  		attr.bp_type = HW_BREAKPOINT_W;  		attr.disabled = 1; -		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); +		bp = register_user_hw_breakpoint(&attr, ptrace_triggered, +						 NULL, tsk);  		/*  		 * CHECKME: the previous code returned -EIO if the addr wasn't diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 55d9bc03f696..fdd0c6430e5a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace)  }  EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)  {  	dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);  	if (trace->nr_entries < trace->max_entries) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f2479f19ddde..6ba477342b8e 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o  lib-y := delay.o  lib-y += thunk_$(BITS).o -lib-y += usercopy_$(BITS).o getuser.o putuser.o +lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o  lib-y += memcpy_$(BITS).o  lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c new file mode 100644 index 000000000000..97be9cb54483 --- /dev/null +++ b/arch/x86/lib/usercopy.c @@ -0,0 +1,43 @@ +/* + * User address space access functions. + * + *  For licencing details see kernel-base/COPYING + */ + +#include <linux/highmem.h> +#include <linux/module.h> + +/* + * best effort, GUP based copy_from_user() that is NMI-safe + */ +unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ +	unsigned long offset, addr = (unsigned long)from; +	unsigned long size, len = 0; +	struct page *page; +	void *map; +	int ret; + +	do { +		ret = __get_user_pages_fast(addr, 1, 0, &page); +		if (!ret) +			break; + +		offset = addr & (PAGE_SIZE - 1); +		size = min(PAGE_SIZE - offset, n - len); + +		map = kmap_atomic(page); +		memcpy(to, map+offset, size); +		kunmap_atomic(map); +		put_page(page); + +		len  += size; +		to   += size; +		addr += size; + +	} while (len < n); + +	return len; +} +EXPORT_SYMBOL_GPL(copy_from_user_nmi); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2dbf6bf4c7e5..4d09df054e39 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)  	if (unlikely(error_code & PF_RSVD))  		pgtable_bad(regs, error_code, address); -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);  	/*  	 * If we're in an interrupt, have no user context or are running @@ -1161,11 +1161,11 @@ good_area:  	if (flags & FAULT_FLAG_ALLOW_RETRY) {  		if (fault & VM_FAULT_MAJOR) {  			tsk->maj_flt++; -			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,  				      regs, address);  		} else {  			tsk->min_flt++; -			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,  				      regs, address);  		}  		if (fault & VM_FAULT_RETRY) { diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 704a37cedddb..dab41876cdd5 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,  	e->trace.entries = e->trace_entries;  	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);  	e->trace.skip = 0; -	save_stack_trace_regs(&e->trace, regs); +	save_stack_trace_regs(regs, &e->trace);  	/* Round address down to nearest 16 bytes */  	shadow_copy = kmemcheck_shadow_lookup(address diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index a5b64ab4cd6e..bff89dfe3619 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -11,10 +11,11 @@  #include <linux/oprofile.h>  #include <linux/sched.h>  #include <linux/mm.h> +#include <linux/compat.h> +#include <linux/uaccess.h> +  #include <asm/ptrace.h> -#include <asm/uaccess.h>  #include <asm/stacktrace.h> -#include <linux/compat.h>  static int backtrace_stack(void *data, char *name)  { @@ -40,13 +41,13 @@ static struct stacktrace_ops backtrace_ops = {  static struct stack_frame_ia32 *  dump_user_backtrace_32(struct stack_frame_ia32 *head)  { +	/* Also check accessibility of one struct frame_head beyond: */  	struct stack_frame_ia32 bufhead[2];  	struct stack_frame_ia32 *fp; +	unsigned long bytes; -	/* Also check accessibility of one struct frame_head beyond */ -	if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) -		return NULL; -	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) +	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); +	if (bytes != sizeof(bufhead))  		return NULL;  	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); @@ -87,12 +88,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)  static struct stack_frame *dump_user_backtrace(struct stack_frame *head)  { +	/* Also check accessibility of one struct frame_head beyond: */  	struct stack_frame bufhead[2]; +	unsigned long bytes; -	/* Also check accessibility of one struct stack_frame beyond */ -	if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) -		return NULL; -	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) +	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); +	if (bytes != sizeof(bufhead))  		return NULL;  	oprofile_add_trace(bufhead[0].return_address); diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c index 9046f7b2ed79..94796f39bc47 100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@ -31,7 +31,7 @@ static int num_counters;  /*   * Overflow callback for oprofile.   */ -static void op_overflow_handler(struct perf_event *event, int unused, +static void op_overflow_handler(struct perf_event *event,  			struct perf_sample_data *data, struct pt_regs *regs)  {  	int id; @@ -79,7 +79,7 @@ static int op_create_counter(int cpu, int event)  	pevent = perf_event_create_kernel_counter(&counter_config[event].attr,  						  cpu, NULL, -						  op_overflow_handler); +						  op_overflow_handler, NULL);  	if (IS_ERR(pevent))  		return PTR_ERR(pevent); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9d88e1cb5dbb..f0c0e8a47ae6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,6 +19,8 @@  #include <asm/ftrace.h> +struct ftrace_hash; +  #ifdef CONFIG_FUNCTION_TRACER  extern int ftrace_enabled; @@ -29,8 +31,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,  typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); -struct ftrace_hash; -  enum {  	FTRACE_OPS_FL_ENABLED		= 1 << 0,  	FTRACE_OPS_FL_GLOBAL		= 1 << 1, @@ -123,7 +123,8 @@ stack_trace_sysctl(struct ctl_table *table, int write,  struct ftrace_func_command {  	struct list_head	list;  	char			*name; -	int			(*func)(char *func, char *cmd, +	int			(*func)(struct ftrace_hash *hash, +					char *func, char *cmd,  					char *params, int enable);  }; diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 59d3ef100eb9..96efa6794ea5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -76,6 +76,7 @@ struct trace_iterator {  	struct trace_entry	*ent;  	unsigned long		lost_events;  	int			leftover; +	int			ent_size;  	int			cpu;  	u64			ts; @@ -129,6 +130,10 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,  void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,  				       struct ring_buffer_event *event,  					unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, +					    struct ring_buffer_event *event, +					    unsigned long flags, int pc, +					    struct pt_regs *regs);  void trace_current_buffer_discard_commit(struct ring_buffer *buffer,  					 struct ring_buffer_event *event); diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index d1e55fed2c7d..6ae9c631a1be 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -73,6 +73,7 @@ static inline unsigned long hw_breakpoint_len(struct perf_event *bp)  extern struct perf_event *  register_user_hw_breakpoint(struct perf_event_attr *attr,  			    perf_overflow_handler_t triggered, +			    void *context,  			    struct task_struct *tsk);  /* FIXME: only change from the attr, and don't unregister */ @@ -85,11 +86,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);  extern struct perf_event *  register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,  				perf_overflow_handler_t	triggered, +				void *context,  				int cpu);  extern struct perf_event * __percpu *  register_wide_hw_breakpoint(struct perf_event_attr *attr, -			    perf_overflow_handler_t triggered); +			    perf_overflow_handler_t triggered, +			    void *context);  extern int register_perf_hw_breakpoint(struct perf_event *bp);  extern int __register_perf_hw_breakpoint(struct perf_event *bp); @@ -115,6 +118,7 @@ static inline int __init init_hw_breakpoint(void) { return 0; }  static inline struct perf_event *  register_user_hw_breakpoint(struct perf_event_attr *attr,  			    perf_overflow_handler_t triggered, +			    void *context,  			    struct task_struct *tsk)	{ return NULL; }  static inline int  modify_user_hw_breakpoint(struct perf_event *bp, @@ -122,10 +126,12 @@ modify_user_hw_breakpoint(struct perf_event *bp,  static inline struct perf_event *  register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,  				perf_overflow_handler_t	 triggered, +				void *context,  				int cpu)		{ return NULL; }  static inline struct perf_event * __percpu *  register_wide_hw_breakpoint(struct perf_event_attr *attr, -			    perf_overflow_handler_t triggered)	{ return NULL; } +			    perf_overflow_handler_t triggered, +			    void *context)		{ return NULL; }  static inline int  register_perf_hw_breakpoint(struct perf_event *bp)	{ return -ENOSYS; }  static inline int diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e0786e35f247..3f2711ccf910 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -61,7 +61,7 @@ enum perf_hw_id {  /*   * Generalized hardware cache events:   * - *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x   *       { read, write, prefetch } x   *       { accesses, misses }   */ @@ -72,6 +72,7 @@ enum perf_hw_cache_id {  	PERF_COUNT_HW_CACHE_DTLB		= 3,  	PERF_COUNT_HW_CACHE_ITLB		= 4,  	PERF_COUNT_HW_CACHE_BPU			= 5, +	PERF_COUNT_HW_CACHE_NODE		= 6,  	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */  }; @@ -536,6 +537,16 @@ struct perf_branch_stack {  struct task_struct; +/* + * extra PMU register associated with an event + */ +struct hw_perf_event_extra { +	u64		config;	/* register value */ +	unsigned int	reg;	/* register address or index */ +	int		alloc;	/* extra register already allocated */ +	int		idx;	/* index in shared_regs->regs[] */ +}; +  /**   * struct hw_perf_event - performance event hardware details:   */ @@ -549,9 +560,7 @@ struct hw_perf_event {  			unsigned long	event_base;  			int		idx;  			int		last_cpu; -			unsigned int	extra_reg; -			u64		extra_config; -			int		extra_alloc; +			struct hw_perf_event_extra extra_reg;  		};  		struct { /* software */  			struct hrtimer	hrtimer; @@ -680,36 +689,9 @@ enum perf_event_active_state {  };  struct file; - -#define PERF_BUFFER_WRITABLE		0x01 - -struct perf_buffer { -	atomic_t			refcount; -	struct rcu_head			rcu_head; -#ifdef CONFIG_PERF_USE_VMALLOC -	struct work_struct		work; -	int				page_order;	/* allocation order  */ -#endif -	int				nr_pages;	/* nr of data pages  */ -	int				writable;	/* are we writable   */ - -	atomic_t			poll;		/* POLL_ for wakeups */ - -	local_t				head;		/* write position    */ -	local_t				nest;		/* nested writers    */ -	local_t				events;		/* event limit       */ -	local_t				wakeup;		/* wakeup stamp      */ -	local_t				lost;		/* nr records lost   */ - -	long				watermark;	/* wakeup watermark  */ - -	struct perf_event_mmap_page	*user_page; -	void				*data_pages[0]; -}; -  struct perf_sample_data; -typedef void (*perf_overflow_handler_t)(struct perf_event *, int, +typedef void (*perf_overflow_handler_t)(struct perf_event *,  					struct perf_sample_data *,  					struct pt_regs *regs); @@ -745,6 +727,8 @@ struct perf_cgroup {  };  #endif +struct ring_buffer; +  /**   * struct perf_event - performance event kernel representation:   */ @@ -834,7 +818,7 @@ struct perf_event {  	atomic_t			mmap_count;  	int				mmap_locked;  	struct user_struct		*mmap_user; -	struct perf_buffer		*buffer; +	struct ring_buffer		*rb;  	/* poll related */  	wait_queue_head_t		waitq; @@ -855,6 +839,7 @@ struct perf_event {  	u64				id;  	perf_overflow_handler_t		overflow_handler; +	void				*overflow_handler_context;  #ifdef CONFIG_EVENT_TRACING  	struct ftrace_event_call	*tp_event; @@ -919,8 +904,8 @@ struct perf_event_context {  	u64				parent_gen;  	u64				generation;  	int				pin_count; -	struct rcu_head			rcu_head;  	int				nr_cgroups; /* cgroup events present */ +	struct rcu_head			rcu_head;  };  /* @@ -945,13 +930,11 @@ struct perf_cpu_context {  struct perf_output_handle {  	struct perf_event		*event; -	struct perf_buffer		*buffer; +	struct ring_buffer		*rb;  	unsigned long			wakeup;  	unsigned long			size;  	void				*addr;  	int				page; -	int				nmi; -	int				sample;  };  #ifdef CONFIG_PERF_EVENTS @@ -972,13 +955,15 @@ extern void perf_pmu_disable(struct pmu *pmu);  extern void perf_pmu_enable(struct pmu *pmu);  extern int perf_event_task_disable(void);  extern int perf_event_task_enable(void); +extern int perf_event_refresh(struct perf_event *event, int refresh);  extern void perf_event_update_userpage(struct perf_event *event);  extern int perf_event_release_kernel(struct perf_event *event);  extern struct perf_event *  perf_event_create_kernel_counter(struct perf_event_attr *attr,  				int cpu,  				struct task_struct *task, -				perf_overflow_handler_t callback); +				perf_overflow_handler_t callback, +				void *context);  extern u64 perf_event_read_value(struct perf_event *event,  				 u64 *enabled, u64 *running); @@ -1018,7 +1003,7 @@ extern void perf_prepare_sample(struct perf_event_header *header,  				struct perf_event *event,  				struct pt_regs *regs); -extern int perf_event_overflow(struct perf_event *event, int nmi, +extern int perf_event_overflow(struct perf_event *event,  				 struct perf_sample_data *data,  				 struct pt_regs *regs); @@ -1037,7 +1022,7 @@ static inline int is_software_event(struct perf_event *event)  extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; -extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); +extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);  #ifndef perf_arch_fetch_caller_regs  static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } @@ -1059,7 +1044,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)  }  static __always_inline void -perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)  {  	struct pt_regs hot_regs; @@ -1068,7 +1053,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)  			perf_fetch_caller_regs(&hot_regs);  			regs = &hot_regs;  		} -		__perf_sw_event(event_id, nr, nmi, regs, addr); +		__perf_sw_event(event_id, nr, regs, addr);  	}  } @@ -1082,7 +1067,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)  static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)  { -	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); +	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);  	__perf_event_task_sched_out(task, next);  } @@ -1143,8 +1128,7 @@ extern void perf_bp_event(struct perf_event *event, void *data);  #endif  extern int perf_output_begin(struct perf_output_handle *handle, -			     struct perf_event *event, unsigned int size, -			     int nmi, int sample); +			     struct perf_event *event, unsigned int size);  extern void perf_output_end(struct perf_output_handle *handle);  extern void perf_output_copy(struct perf_output_handle *handle,  			     const void *buf, unsigned int len); @@ -1166,10 +1150,13 @@ static inline void perf_event_delayed_put(struct task_struct *task)	{ }  static inline void perf_event_print_debug(void)				{ }  static inline int perf_event_task_disable(void)				{ return -EINVAL; }  static inline int perf_event_task_enable(void)				{ return -EINVAL; } +static inline int perf_event_refresh(struct perf_event *event, int refresh) +{ +	return -EINVAL; +}  static inline void -perf_sw_event(u32 event_id, u64 nr, int nmi, -		     struct pt_regs *regs, u64 addr)			{ } +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }  static inline void  perf_bp_event(struct perf_event *event, void *data)			{ } diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index ab38ac80b0f9..b891de96000f 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -169,7 +169,7 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,  size_t ring_buffer_page_len(void *page); -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);  void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);  int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,  			  size_t len, int cpu, int full); diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 25310f1d7f37..115b570e3bff 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -14,8 +14,8 @@ struct stack_trace {  };  extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_regs(struct stack_trace *trace, -				  struct pt_regs *regs); +extern void save_stack_trace_regs(struct pt_regs *regs, +				  struct stack_trace *trace);  extern void save_stack_trace_tsk(struct task_struct *tsk,  				struct stack_trace *trace); diff --git a/kernel/async.c b/kernel/async.c index cd9dbb913c77..d5fe7af0de2e 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel.  */  #include <linux/async.h> +#include <linux/atomic.h> +#include <linux/ktime.h>  #include <linux/module.h>  #include <linux/wait.h>  #include <linux/sched.h>  #include <linux/slab.h>  #include <linux/workqueue.h> -#include <asm/atomic.h>  static async_cookie_t next_cookie = 1; @@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work)  	/* 2) run (and print duration) */  	if (initcall_debug && system_state == SYSTEM_BOOTING) { -		printk("calling  %lli_%pF @ %i\n", (long long)entry->cookie, +		printk(KERN_DEBUG "calling  %lli_%pF @ %i\n", +			(long long)entry->cookie,  			entry->func, task_pid_nr(current));  		calltime = ktime_get();  	} @@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work)  	if (initcall_debug && system_state == SYSTEM_BOOTING) {  		rettime = ktime_get();  		delta = ktime_sub(rettime, calltime); -		printk("initcall %lli_%pF returned 0 after %lld usecs\n", +		printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n",  			(long long)entry->cookie,  			entry->func,  			(long long)ktime_to_ns(delta) >> 10); @@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,  	ktime_t starttime, delta, endtime;  	if (initcall_debug && system_state == SYSTEM_BOOTING) { -		printk("async_waiting @ %i\n", task_pid_nr(current)); +		printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));  		starttime = ktime_get();  	} @@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,  		endtime = ktime_get();  		delta = ktime_sub(endtime, starttime); -		printk("async_continuing @ %i after %lli usec\n", +		printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n",  			task_pid_nr(current),  			(long long)ktime_to_ns(delta) >> 10);  	} diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 1ce23d3d8394..89e5e8aa4c36 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER  CFLAGS_REMOVE_core.o = -pg  endif -obj-y := core.o +obj-y := core.o ring_buffer.o  obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/kernel/events/core.c b/kernel/events/core.c index 9efe7108ccaf..b8785e26ee1c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,8 @@  #include <linux/ftrace_event.h>  #include <linux/hw_breakpoint.h> +#include "internal.h" +  #include <asm/irq_regs.h>  struct remote_function_call { @@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx)  	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);  } +static void perf_ctx_lock(struct perf_cpu_context *cpuctx, +			  struct perf_event_context *ctx) +{ +	raw_spin_lock(&cpuctx->ctx.lock); +	if (ctx) +		raw_spin_lock(&ctx->lock); +} + +static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, +			    struct perf_event_context *ctx) +{ +	if (ctx) +		raw_spin_unlock(&ctx->lock); +	raw_spin_unlock(&cpuctx->ctx.lock); +} +  #ifdef CONFIG_CGROUP_PERF  /* @@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)  	rcu_read_lock();  	list_for_each_entry_rcu(pmu, &pmus, entry) { -  		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); -		perf_pmu_disable(cpuctx->ctx.pmu); -  		/*  		 * perf_cgroup_events says at least one  		 * context on this CPU has cgroup events. @@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)  		 * events for a context.  		 */  		if (cpuctx->ctx.nr_cgroups > 0) { +			perf_ctx_lock(cpuctx, cpuctx->task_ctx); +			perf_pmu_disable(cpuctx->ctx.pmu);  			if (mode & PERF_CGROUP_SWOUT) {  				cpu_ctx_sched_out(cpuctx, EVENT_ALL); @@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode)  				cpuctx->cgrp = perf_cgroup_from_task(task);  				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);  			} +			perf_pmu_enable(cpuctx->ctx.pmu); +			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);  		} - -		perf_pmu_enable(cpuctx->ctx.pmu);  	}  	rcu_read_unlock(); @@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event)  /*   * Update the total_time_enabled and total_time_running fields for a event. + * The caller of this function needs to hold the ctx->lock.   */  static void update_event_times(struct perf_event *event)  { @@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info)  	raw_spin_lock(&ctx->lock);  	event_sched_out(event, cpuctx, ctx);  	list_del_event(event, ctx); +	if (!ctx->nr_events && cpuctx->task_ctx == ctx) { +		ctx->is_active = 0; +		cpuctx->task_ctx = NULL; +	}  	raw_spin_unlock(&ctx->lock);  	return 0; @@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event,  	event->tstamp_stopped = tstamp;  } -static void perf_event_context_sched_in(struct perf_event_context *ctx, -					struct task_struct *tsk); +static void task_ctx_sched_out(struct perf_event_context *ctx); +static void +ctx_sched_in(struct perf_event_context *ctx, +	     struct perf_cpu_context *cpuctx, +	     enum event_type_t event_type, +	     struct task_struct *task); + +static void perf_event_sched_in(struct perf_cpu_context *cpuctx, +				struct perf_event_context *ctx, +				struct task_struct *task) +{ +	cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); +	if (ctx) +		ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); +	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); +	if (ctx) +		ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); +}  /*   * Cross CPU call to install and enable a performance event @@ -1466,20 +1504,37 @@ static int  __perf_install_in_context(void *info)  {  	struct perf_event *event = info;  	struct perf_event_context *ctx = event->ctx; -	struct perf_event *leader = event->group_leader;  	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); -	int err; +	struct perf_event_context *task_ctx = cpuctx->task_ctx; +	struct task_struct *task = current; + +	perf_ctx_lock(cpuctx, task_ctx); +	perf_pmu_disable(cpuctx->ctx.pmu);  	/* -	 * In case we're installing a new context to an already running task, -	 * could also happen before perf_event_task_sched_in() on architectures -	 * which do context switches with IRQs enabled. +	 * If there was an active task_ctx schedule it out.  	 */ -	if (ctx->task && !cpuctx->task_ctx) -		perf_event_context_sched_in(ctx, ctx->task); +	if (task_ctx) +		task_ctx_sched_out(task_ctx); + +	/* +	 * If the context we're installing events in is not the +	 * active task_ctx, flip them. +	 */ +	if (ctx->task && task_ctx != ctx) { +		if (task_ctx) +			raw_spin_unlock(&task_ctx->lock); +		raw_spin_lock(&ctx->lock); +		task_ctx = ctx; +	} + +	if (task_ctx) { +		cpuctx->task_ctx = task_ctx; +		task = task_ctx->task; +	} + +	cpu_ctx_sched_out(cpuctx, EVENT_ALL); -	raw_spin_lock(&ctx->lock); -	ctx->is_active = 1;  	update_context_time(ctx);  	/*  	 * update cgrp time only if current cgrp @@ -1490,43 +1545,13 @@ static int  __perf_install_in_context(void *info)  	add_event_to_ctx(event, ctx); -	if (!event_filter_match(event)) -		goto unlock; - -	/* -	 * Don't put the event on if it is disabled or if -	 * it is in a group and the group isn't on. -	 */ -	if (event->state != PERF_EVENT_STATE_INACTIVE || -	    (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) -		goto unlock; -  	/* -	 * An exclusive event can't go on if there are already active -	 * hardware events, and no hardware event can go on if there -	 * is already an exclusive event on. +	 * Schedule everything back in  	 */ -	if (!group_can_go_on(event, cpuctx, 1)) -		err = -EEXIST; -	else -		err = event_sched_in(event, cpuctx, ctx); - -	if (err) { -		/* -		 * This event couldn't go on.  If it is in a group -		 * then we have to pull the whole group off. -		 * If the event group is pinned then put it in error state. -		 */ -		if (leader != event) -			group_sched_out(leader, cpuctx, ctx); -		if (leader->attr.pinned) { -			update_group_times(leader); -			leader->state = PERF_EVENT_STATE_ERROR; -		} -	} +	perf_event_sched_in(cpuctx, task_ctx, task); -unlock: -	raw_spin_unlock(&ctx->lock); +	perf_pmu_enable(cpuctx->ctx.pmu); +	perf_ctx_unlock(cpuctx, task_ctx);  	return 0;  } @@ -1739,7 +1764,7 @@ out:  	raw_spin_unlock_irq(&ctx->lock);  } -static int perf_event_refresh(struct perf_event *event, int refresh) +int perf_event_refresh(struct perf_event *event, int refresh)  {  	/*  	 * not supported on inherited events @@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh)  	return 0;  } +EXPORT_SYMBOL_GPL(perf_event_refresh);  static void ctx_sched_out(struct perf_event_context *ctx,  			  struct perf_cpu_context *cpuctx,  			  enum event_type_t event_type)  {  	struct perf_event *event; +	int is_active = ctx->is_active; -	raw_spin_lock(&ctx->lock); -	perf_pmu_disable(ctx->pmu); -	ctx->is_active = 0; +	ctx->is_active &= ~event_type;  	if (likely(!ctx->nr_events)) -		goto out; +		return; +  	update_context_time(ctx);  	update_cgrp_time_from_cpuctx(cpuctx); -  	if (!ctx->nr_active) -		goto out; +		return; -	if (event_type & EVENT_PINNED) { +	perf_pmu_disable(ctx->pmu); +	if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {  		list_for_each_entry(event, &ctx->pinned_groups, group_entry)  			group_sched_out(event, cpuctx, ctx);  	} -	if (event_type & EVENT_FLEXIBLE) { +	if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {  		list_for_each_entry(event, &ctx->flexible_groups, group_entry)  			group_sched_out(event, cpuctx, ctx);  	} -out:  	perf_pmu_enable(ctx->pmu); -	raw_spin_unlock(&ctx->lock);  }  /* @@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,  	rcu_read_unlock();  	if (do_switch) { +		raw_spin_lock(&ctx->lock);  		ctx_sched_out(ctx, cpuctx, EVENT_ALL);  		cpuctx->task_ctx = NULL; +		raw_spin_unlock(&ctx->lock);  	}  } @@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task,  		perf_cgroup_sched_out(task);  } -static void task_ctx_sched_out(struct perf_event_context *ctx, -			       enum event_type_t event_type) +static void task_ctx_sched_out(struct perf_event_context *ctx)  {  	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,  	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))  		return; -	ctx_sched_out(ctx, cpuctx, event_type); +	ctx_sched_out(ctx, cpuctx, EVENT_ALL);  	cpuctx->task_ctx = NULL;  } @@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx,  	     struct task_struct *task)  {  	u64 now; +	int is_active = ctx->is_active; -	raw_spin_lock(&ctx->lock); -	ctx->is_active = 1; +	ctx->is_active |= event_type;  	if (likely(!ctx->nr_events)) -		goto out; +		return;  	now = perf_clock();  	ctx->timestamp = now; @@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx,  	 * First go through the list and put on any pinned groups  	 * in order to give them the best chance of going on.  	 */ -	if (event_type & EVENT_PINNED) +	if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))  		ctx_pinned_sched_in(ctx, cpuctx);  	/* Then walk through the lower prio flexible groups */ -	if (event_type & EVENT_FLEXIBLE) +	if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))  		ctx_flexible_sched_in(ctx, cpuctx); - -out: -	raw_spin_unlock(&ctx->lock);  }  static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, @@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,  	ctx_sched_in(ctx, cpuctx, event_type, task);  } -static void task_ctx_sched_in(struct perf_event_context *ctx, -			      enum event_type_t event_type) -{ -	struct perf_cpu_context *cpuctx; - -	cpuctx = __get_cpu_context(ctx); -	if (cpuctx->task_ctx == ctx) -		return; - -	ctx_sched_in(ctx, cpuctx, event_type, NULL); -	cpuctx->task_ctx = ctx; -} -  static void perf_event_context_sched_in(struct perf_event_context *ctx,  					struct task_struct *task)  { @@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,  	if (cpuctx->task_ctx == ctx)  		return; +	perf_ctx_lock(cpuctx, ctx);  	perf_pmu_disable(ctx->pmu);  	/*  	 * We want to keep the following priority order: @@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,  	 */  	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); -	ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); -	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); -	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); +	perf_event_sched_in(cpuctx, ctx, task);  	cpuctx->task_ctx = ctx; +	perf_pmu_enable(ctx->pmu); +	perf_ctx_unlock(cpuctx, ctx); +  	/*  	 * Since these rotations are per-cpu, we need to ensure the  	 * cpu-context we got scheduled on is actually rotating.  	 */  	perf_pmu_rotate_start(ctx->pmu); -	perf_pmu_enable(ctx->pmu);  }  /* @@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)  	u64 interrupts, now;  	s64 delta; -	raw_spin_lock(&ctx->lock);  	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {  		if (event->state != PERF_EVENT_STATE_ACTIVE)  			continue; @@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)  		if (delta > 0)  			perf_adjust_period(event, period, delta);  	} -	raw_spin_unlock(&ctx->lock);  }  /* @@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)   */  static void rotate_ctx(struct perf_event_context *ctx)  { -	raw_spin_lock(&ctx->lock); -  	/*  	 * Rotate the first entry last of non-pinned groups. Rotation might be  	 * disabled by the inheritance code.  	 */  	if (!ctx->rotate_disable)  		list_rotate_left(&ctx->flexible_groups); - -	raw_spin_unlock(&ctx->lock);  }  /* @@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)  			rotate = 1;  	} +	perf_ctx_lock(cpuctx, cpuctx->task_ctx);  	perf_pmu_disable(cpuctx->ctx.pmu);  	perf_ctx_adjust_freq(&cpuctx->ctx, interval);  	if (ctx) @@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)  	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);  	if (ctx) -		task_ctx_sched_out(ctx, EVENT_FLEXIBLE); +		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);  	rotate_ctx(&cpuctx->ctx);  	if (ctx)  		rotate_ctx(ctx); -	cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); -	if (ctx) -		task_ctx_sched_in(ctx, EVENT_FLEXIBLE); +	perf_event_sched_in(cpuctx, ctx, current);  done:  	if (remove)  		list_del_init(&cpuctx->rotation_list);  	perf_pmu_enable(cpuctx->ctx.pmu); +	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);  }  void perf_event_task_tick(void) @@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)  	 * in.  	 */  	perf_cgroup_sched_out(current); -	task_ctx_sched_out(ctx, EVENT_ALL);  	raw_spin_lock(&ctx->lock); +	task_ctx_sched_out(ctx);  	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {  		ret = event_enable_on_exec(event, ctx); @@ -2835,16 +2839,12 @@ retry:  		unclone_ctx(ctx);  		++ctx->pin_count;  		raw_spin_unlock_irqrestore(&ctx->lock, flags); -	} - -	if (!ctx) { +	} else {  		ctx = alloc_perf_context(pmu, task);  		err = -ENOMEM;  		if (!ctx)  			goto errout; -		get_ctx(ctx); -  		err = 0;  		mutex_lock(&task->perf_event_mutex);  		/* @@ -2856,14 +2856,14 @@ retry:  		else if (task->perf_event_ctxp[ctxn])  			err = -EAGAIN;  		else { +			get_ctx(ctx);  			++ctx->pin_count;  			rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);  		}  		mutex_unlock(&task->perf_event_mutex);  		if (unlikely(err)) { -			put_task_struct(task); -			kfree(ctx); +			put_ctx(ctx);  			if (err == -EAGAIN)  				goto retry; @@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head)  	kfree(event);  } -static void perf_buffer_put(struct perf_buffer *buffer); +static void ring_buffer_put(struct ring_buffer *rb);  static void free_event(struct perf_event *event)  { @@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event)  		}  	} -	if (event->buffer) { -		perf_buffer_put(event->buffer); -		event->buffer = NULL; +	if (event->rb) { +		ring_buffer_put(event->rb); +		event->rb = NULL;  	}  	if (is_cgroup_event(event)) @@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event)  {  	struct perf_event_context *ctx = event->ctx; -	/* -	 * Remove from the PMU, can't get re-enabled since we got -	 * here because the last ref went. -	 */ -	perf_event_disable(event); -  	WARN_ON_ONCE(ctx->parent_ctx);  	/*  	 * There are two ways this annotation is useful: @@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event)  	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);  	raw_spin_lock_irq(&ctx->lock);  	perf_group_detach(event); -	list_del_event(event, ctx);  	raw_spin_unlock_irq(&ctx->lock); +	perf_remove_from_context(event);  	mutex_unlock(&ctx->mutex);  	free_event(event); @@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)  static unsigned int perf_poll(struct file *file, poll_table *wait)  {  	struct perf_event *event = file->private_data; -	struct perf_buffer *buffer; +	struct ring_buffer *rb;  	unsigned int events = POLL_HUP;  	rcu_read_lock(); -	buffer = rcu_dereference(event->buffer); -	if (buffer) -		events = atomic_xchg(&buffer->poll, 0); +	rb = rcu_dereference(event->rb); +	if (rb) +		events = atomic_xchg(&rb->poll, 0);  	rcu_read_unlock();  	poll_wait(file, &event->waitq, wait); @@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event)  	return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;  } +static void calc_timer_values(struct perf_event *event, +				u64 *running, +				u64 *enabled) +{ +	u64 now, ctx_time; + +	now = perf_clock(); +	ctx_time = event->shadow_ctx_time + now; +	*enabled = ctx_time - event->tstamp_enabled; +	*running = ctx_time - event->tstamp_running; +} +  /*   * Callers need to ensure there can be no nesting of this function, otherwise   * the seqlock logic goes bad. We can not serialize this because the arch @@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event)  void perf_event_update_userpage(struct perf_event *event)  {  	struct perf_event_mmap_page *userpg; -	struct perf_buffer *buffer; +	struct ring_buffer *rb; +	u64 enabled, running;  	rcu_read_lock(); -	buffer = rcu_dereference(event->buffer); -	if (!buffer) +	/* +	 * compute total_time_enabled, total_time_running +	 * based on snapshot values taken when the event +	 * was last scheduled in. +	 * +	 * we cannot simply called update_context_time() +	 * because of locking issue as we can be called in +	 * NMI context +	 */ +	calc_timer_values(event, &enabled, &running); +	rb = rcu_dereference(event->rb); +	if (!rb)  		goto unlock; -	userpg = buffer->user_page; +	userpg = rb->user_page;  	/*  	 * Disable preemption so as to not let the corresponding user-space @@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event)  	if (event->state == PERF_EVENT_STATE_ACTIVE)  		userpg->offset -= local64_read(&event->hw.prev_count); -	userpg->time_enabled = event->total_time_enabled + +	userpg->time_enabled = enabled +  			atomic64_read(&event->child_total_time_enabled); -	userpg->time_running = event->total_time_running + +	userpg->time_running = running +  			atomic64_read(&event->child_total_time_running);  	barrier(); @@ -3400,220 +3417,10 @@ unlock:  	rcu_read_unlock();  } -static unsigned long perf_data_size(struct perf_buffer *buffer); - -static void -perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags) -{ -	long max_size = perf_data_size(buffer); - -	if (watermark) -		buffer->watermark = min(max_size, watermark); - -	if (!buffer->watermark) -		buffer->watermark = max_size / 2; - -	if (flags & PERF_BUFFER_WRITABLE) -		buffer->writable = 1; - -	atomic_set(&buffer->refcount, 1); -} - -#ifndef CONFIG_PERF_USE_VMALLOC - -/* - * Back perf_mmap() with regular GFP_KERNEL-0 pages. - */ - -static struct page * -perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) -{ -	if (pgoff > buffer->nr_pages) -		return NULL; - -	if (pgoff == 0) -		return virt_to_page(buffer->user_page); - -	return virt_to_page(buffer->data_pages[pgoff - 1]); -} - -static void *perf_mmap_alloc_page(int cpu) -{ -	struct page *page; -	int node; - -	node = (cpu == -1) ? cpu : cpu_to_node(cpu); -	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); -	if (!page) -		return NULL; - -	return page_address(page); -} - -static struct perf_buffer * -perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) -{ -	struct perf_buffer *buffer; -	unsigned long size; -	int i; - -	size = sizeof(struct perf_buffer); -	size += nr_pages * sizeof(void *); - -	buffer = kzalloc(size, GFP_KERNEL); -	if (!buffer) -		goto fail; - -	buffer->user_page = perf_mmap_alloc_page(cpu); -	if (!buffer->user_page) -		goto fail_user_page; - -	for (i = 0; i < nr_pages; i++) { -		buffer->data_pages[i] = perf_mmap_alloc_page(cpu); -		if (!buffer->data_pages[i]) -			goto fail_data_pages; -	} - -	buffer->nr_pages = nr_pages; - -	perf_buffer_init(buffer, watermark, flags); - -	return buffer; - -fail_data_pages: -	for (i--; i >= 0; i--) -		free_page((unsigned long)buffer->data_pages[i]); - -	free_page((unsigned long)buffer->user_page); - -fail_user_page: -	kfree(buffer); - -fail: -	return NULL; -} - -static void perf_mmap_free_page(unsigned long addr) -{ -	struct page *page = virt_to_page((void *)addr); - -	page->mapping = NULL; -	__free_page(page); -} - -static void perf_buffer_free(struct perf_buffer *buffer) -{ -	int i; - -	perf_mmap_free_page((unsigned long)buffer->user_page); -	for (i = 0; i < buffer->nr_pages; i++) -		perf_mmap_free_page((unsigned long)buffer->data_pages[i]); -	kfree(buffer); -} - -static inline int page_order(struct perf_buffer *buffer) -{ -	return 0; -} - -#else - -/* - * Back perf_mmap() with vmalloc memory. - * - * Required for architectures that have d-cache aliasing issues. - */ - -static inline int page_order(struct perf_buffer *buffer) -{ -	return buffer->page_order; -} - -static struct page * -perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) -{ -	if (pgoff > (1UL << page_order(buffer))) -		return NULL; - -	return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE); -} - -static void perf_mmap_unmark_page(void *addr) -{ -	struct page *page = vmalloc_to_page(addr); - -	page->mapping = NULL; -} - -static void perf_buffer_free_work(struct work_struct *work) -{ -	struct perf_buffer *buffer; -	void *base; -	int i, nr; - -	buffer = container_of(work, struct perf_buffer, work); -	nr = 1 << page_order(buffer); - -	base = buffer->user_page; -	for (i = 0; i < nr + 1; i++) -		perf_mmap_unmark_page(base + (i * PAGE_SIZE)); - -	vfree(base); -	kfree(buffer); -} - -static void perf_buffer_free(struct perf_buffer *buffer) -{ -	schedule_work(&buffer->work); -} - -static struct perf_buffer * -perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) -{ -	struct perf_buffer *buffer; -	unsigned long size; -	void *all_buf; - -	size = sizeof(struct perf_buffer); -	size += sizeof(void *); - -	buffer = kzalloc(size, GFP_KERNEL); -	if (!buffer) -		goto fail; - -	INIT_WORK(&buffer->work, perf_buffer_free_work); - -	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); -	if (!all_buf) -		goto fail_all_buf; - -	buffer->user_page = all_buf; -	buffer->data_pages[0] = all_buf + PAGE_SIZE; -	buffer->page_order = ilog2(nr_pages); -	buffer->nr_pages = 1; - -	perf_buffer_init(buffer, watermark, flags); - -	return buffer; - -fail_all_buf: -	kfree(buffer); - -fail: -	return NULL; -} - -#endif - -static unsigned long perf_data_size(struct perf_buffer *buffer) -{ -	return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer)); -} -  static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  {  	struct perf_event *event = vma->vm_file->private_data; -	struct perf_buffer *buffer; +	struct ring_buffer *rb;  	int ret = VM_FAULT_SIGBUS;  	if (vmf->flags & FAULT_FLAG_MKWRITE) { @@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  	}  	rcu_read_lock(); -	buffer = rcu_dereference(event->buffer); -	if (!buffer) +	rb = rcu_dereference(event->rb); +	if (!rb)  		goto unlock;  	if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))  		goto unlock; -	vmf->page = perf_mmap_to_page(buffer, vmf->pgoff); +	vmf->page = perf_mmap_to_page(rb, vmf->pgoff);  	if (!vmf->page)  		goto unlock; @@ -3645,35 +3452,35 @@ unlock:  	return ret;  } -static void perf_buffer_free_rcu(struct rcu_head *rcu_head) +static void rb_free_rcu(struct rcu_head *rcu_head)  { -	struct perf_buffer *buffer; +	struct ring_buffer *rb; -	buffer = container_of(rcu_head, struct perf_buffer, rcu_head); -	perf_buffer_free(buffer); +	rb = container_of(rcu_head, struct ring_buffer, rcu_head); +	rb_free(rb);  } -static struct perf_buffer *perf_buffer_get(struct perf_event *event) +static struct ring_buffer *ring_buffer_get(struct perf_event *event)  { -	struct perf_buffer *buffer; +	struct ring_buffer *rb;  	rcu_read_lock(); -	buffer = rcu_dereference(event->buffer); -	if (buffer) { -		if (!atomic_inc_not_zero(&buffer->refcount)) -			buffer = NULL; +	rb = rcu_dereference(event->rb); +	if (rb) { +		if (!atomic_inc_not_zero(&rb->refcount)) +			rb = NULL;  	}  	rcu_read_unlock(); -	return buffer; +	return rb;  } -static void perf_buffer_put(struct perf_buffer *buffer) +static void ring_buffer_put(struct ring_buffer *rb)  { -	if (!atomic_dec_and_test(&buffer->refcount)) +	if (!atomic_dec_and_test(&rb->refcount))  		return; -	call_rcu(&buffer->rcu_head, perf_buffer_free_rcu); +	call_rcu(&rb->rcu_head, rb_free_rcu);  }  static void perf_mmap_open(struct vm_area_struct *vma) @@ -3688,16 +3495,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)  	struct perf_event *event = vma->vm_file->private_data;  	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { -		unsigned long size = perf_data_size(event->buffer); +		unsigned long size = perf_data_size(event->rb);  		struct user_struct *user = event->mmap_user; -		struct perf_buffer *buffer = event->buffer; +		struct ring_buffer *rb = event->rb;  		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);  		vma->vm_mm->locked_vm -= event->mmap_locked; -		rcu_assign_pointer(event->buffer, NULL); +		rcu_assign_pointer(event->rb, NULL);  		mutex_unlock(&event->mmap_mutex); -		perf_buffer_put(buffer); +		ring_buffer_put(rb);  		free_uid(user);  	}  } @@ -3715,7 +3522,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)  	unsigned long user_locked, user_lock_limit;  	struct user_struct *user = current_user();  	unsigned long locked, lock_limit; -	struct perf_buffer *buffer; +	struct ring_buffer *rb;  	unsigned long vma_size;  	unsigned long nr_pages;  	long user_extra, extra; @@ -3724,7 +3531,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)  	/*  	 * Don't allow mmap() of inherited per-task counters. This would  	 * create a performance issue due to all children writing to the -	 * same buffer. +	 * same rb.  	 */  	if (event->cpu == -1 && event->attr.inherit)  		return -EINVAL; @@ -3736,7 +3543,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)  	nr_pages = (vma_size / PAGE_SIZE) - 1;  	/* -	 * If we have buffer pages ensure they're a power-of-two number, so we +	 * If we have rb pages ensure they're a power-of-two number, so we  	 * can do bitmasks instead of modulo.  	 */  	if (nr_pages != 0 && !is_power_of_2(nr_pages)) @@ -3750,9 +3557,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)  	WARN_ON_ONCE(event->ctx->parent_ctx);  	mutex_lock(&event->mmap_mutex); -	if (event->buffer) { -		if (event->buffer->nr_pages == nr_pages) -			atomic_inc(&event->buffer->refcount); +	if (event->rb) { +		if (event->rb->nr_pages == nr_pages) +			atomic_inc(&event->rb->refcount);  		else  			ret = -EINVAL;  		goto unlock; @@ -3782,18 +3589,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)  		goto unlock;  	} -	WARN_ON(event->buffer); +	WARN_ON(event->rb);  	if (vma->vm_flags & VM_WRITE) -		flags |= PERF_BUFFER_WRITABLE; +		flags |= RING_BUFFER_WRITABLE; -	buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark, -				   event->cpu, flags); -	if (!buffer) { +	rb = rb_alloc(nr_pages,  +		event->attr.watermark ? event->attr.wakeup_watermark : 0, +		event->cpu, flags); + +	if (!rb) {  		ret = -ENOMEM;  		goto unlock;  	} -	rcu_assign_pointer(event->buffer, buffer); +	rcu_assign_pointer(event->rb, rb);  	atomic_long_add(user_extra, &user->locked_vm);  	event->mmap_locked = extra; @@ -3892,117 +3701,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)  }  EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); -/* - * Output - */ -static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail, -			      unsigned long offset, unsigned long head) -{ -	unsigned long mask; - -	if (!buffer->writable) -		return true; - -	mask = perf_data_size(buffer) - 1; - -	offset = (offset - tail) & mask; -	head   = (head   - tail) & mask; - -	if ((int)(head - offset) < 0) -		return false; - -	return true; -} - -static void perf_output_wakeup(struct perf_output_handle *handle) -{ -	atomic_set(&handle->buffer->poll, POLL_IN); - -	if (handle->nmi) { -		handle->event->pending_wakeup = 1; -		irq_work_queue(&handle->event->pending); -	} else -		perf_event_wakeup(handle->event); -} - -/* - * We need to ensure a later event_id doesn't publish a head when a former - * event isn't done writing. However since we need to deal with NMIs we - * cannot fully serialize things. - * - * We only publish the head (and generate a wakeup) when the outer-most - * event completes. - */ -static void perf_output_get_handle(struct perf_output_handle *handle) -{ -	struct perf_buffer *buffer = handle->buffer; - -	preempt_disable(); -	local_inc(&buffer->nest); -	handle->wakeup = local_read(&buffer->wakeup); -} - -static void perf_output_put_handle(struct perf_output_handle *handle) -{ -	struct perf_buffer *buffer = handle->buffer; -	unsigned long head; - -again: -	head = local_read(&buffer->head); - -	/* -	 * IRQ/NMI can happen here, which means we can miss a head update. -	 */ - -	if (!local_dec_and_test(&buffer->nest)) -		goto out; - -	/* -	 * Publish the known good head. Rely on the full barrier implied -	 * by atomic_dec_and_test() order the buffer->head read and this -	 * write. -	 */ -	buffer->user_page->data_head = head; - -	/* -	 * Now check if we missed an update, rely on the (compiler) -	 * barrier in atomic_dec_and_test() to re-read buffer->head. -	 */ -	if (unlikely(head != local_read(&buffer->head))) { -		local_inc(&buffer->nest); -		goto again; -	} - -	if (handle->wakeup != local_read(&buffer->wakeup)) -		perf_output_wakeup(handle); - -out: -	preempt_enable(); -} - -__always_inline void perf_output_copy(struct perf_output_handle *handle, -		      const void *buf, unsigned int len) -{ -	do { -		unsigned long size = min_t(unsigned long, handle->size, len); - -		memcpy(handle->addr, buf, size); - -		len -= size; -		handle->addr += size; -		buf += size; -		handle->size -= size; -		if (!handle->size) { -			struct perf_buffer *buffer = handle->buffer; - -			handle->page++; -			handle->page &= buffer->nr_pages - 1; -			handle->addr = buffer->data_pages[handle->page]; -			handle->size = PAGE_SIZE << page_order(buffer); -		} -	} while (len); -} -  static void __perf_event_header__init_id(struct perf_event_header *header,  					 struct perf_sample_data *data,  					 struct perf_event *event) @@ -4033,9 +3731,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header,  	}  } -static void perf_event_header__init_id(struct perf_event_header *header, -				       struct perf_sample_data *data, -				       struct perf_event *event) +void perf_event_header__init_id(struct perf_event_header *header, +				struct perf_sample_data *data, +				struct perf_event *event)  {  	if (event->attr.sample_id_all)  		__perf_event_header__init_id(header, data, event); @@ -4062,121 +3760,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,  		perf_output_put(handle, data->cpu_entry);  } -static void perf_event__output_id_sample(struct perf_event *event, -					 struct perf_output_handle *handle, -					 struct perf_sample_data *sample) +void perf_event__output_id_sample(struct perf_event *event, +				  struct perf_output_handle *handle, +				  struct perf_sample_data *sample)  {  	if (event->attr.sample_id_all)  		__perf_event__output_id_sample(handle, sample);  } -int perf_output_begin(struct perf_output_handle *handle, -		      struct perf_event *event, unsigned int size, -		      int nmi, int sample) -{ -	struct perf_buffer *buffer; -	unsigned long tail, offset, head; -	int have_lost; -	struct perf_sample_data sample_data; -	struct { -		struct perf_event_header header; -		u64			 id; -		u64			 lost; -	} lost_event; - -	rcu_read_lock(); -	/* -	 * For inherited events we send all the output towards the parent. -	 */ -	if (event->parent) -		event = event->parent; - -	buffer = rcu_dereference(event->buffer); -	if (!buffer) -		goto out; - -	handle->buffer	= buffer; -	handle->event	= event; -	handle->nmi	= nmi; -	handle->sample	= sample; - -	if (!buffer->nr_pages) -		goto out; - -	have_lost = local_read(&buffer->lost); -	if (have_lost) { -		lost_event.header.size = sizeof(lost_event); -		perf_event_header__init_id(&lost_event.header, &sample_data, -					   event); -		size += lost_event.header.size; -	} - -	perf_output_get_handle(handle); - -	do { -		/* -		 * Userspace could choose to issue a mb() before updating the -		 * tail pointer. So that all reads will be completed before the -		 * write is issued. -		 */ -		tail = ACCESS_ONCE(buffer->user_page->data_tail); -		smp_rmb(); -		offset = head = local_read(&buffer->head); -		head += size; -		if (unlikely(!perf_output_space(buffer, tail, offset, head))) -			goto fail; -	} while (local_cmpxchg(&buffer->head, offset, head) != offset); - -	if (head - local_read(&buffer->wakeup) > buffer->watermark) -		local_add(buffer->watermark, &buffer->wakeup); - -	handle->page = offset >> (PAGE_SHIFT + page_order(buffer)); -	handle->page &= buffer->nr_pages - 1; -	handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1); -	handle->addr = buffer->data_pages[handle->page]; -	handle->addr += handle->size; -	handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size; - -	if (have_lost) { -		lost_event.header.type = PERF_RECORD_LOST; -		lost_event.header.misc = 0; -		lost_event.id          = event->id; -		lost_event.lost        = local_xchg(&buffer->lost, 0); - -		perf_output_put(handle, lost_event); -		perf_event__output_id_sample(event, handle, &sample_data); -	} - -	return 0; - -fail: -	local_inc(&buffer->lost); -	perf_output_put_handle(handle); -out: -	rcu_read_unlock(); - -	return -ENOSPC; -} - -void perf_output_end(struct perf_output_handle *handle) -{ -	struct perf_event *event = handle->event; -	struct perf_buffer *buffer = handle->buffer; - -	int wakeup_events = event->attr.wakeup_events; - -	if (handle->sample && wakeup_events) { -		int events = local_inc_return(&buffer->events); -		if (events >= wakeup_events) { -			local_sub(wakeup_events, &buffer->events); -			local_inc(&buffer->wakeup); -		} -	} - -	perf_output_put_handle(handle); -	rcu_read_unlock(); -} -  static void perf_output_read_one(struct perf_output_handle *handle,  				 struct perf_event *event,  				 u64 enabled, u64 running) @@ -4197,7 +3788,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,  	if (read_format & PERF_FORMAT_ID)  		values[n++] = primary_event_id(event); -	perf_output_copy(handle, values, n * sizeof(u64)); +	__output_copy(handle, values, n * sizeof(u64));  }  /* @@ -4227,7 +3818,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,  	if (read_format & PERF_FORMAT_ID)  		values[n++] = primary_event_id(leader); -	perf_output_copy(handle, values, n * sizeof(u64)); +	__output_copy(handle, values, n * sizeof(u64));  	list_for_each_entry(sub, &leader->sibling_list, group_entry) {  		n = 0; @@ -4239,7 +3830,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,  		if (read_format & PERF_FORMAT_ID)  			values[n++] = primary_event_id(sub); -		perf_output_copy(handle, values, n * sizeof(u64)); +		__output_copy(handle, values, n * sizeof(u64));  	}  } @@ -4249,7 +3840,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,  static void perf_output_read(struct perf_output_handle *handle,  			     struct perf_event *event)  { -	u64 enabled = 0, running = 0, now, ctx_time; +	u64 enabled = 0, running = 0;  	u64 read_format = event->attr.read_format;  	/* @@ -4261,12 +3852,8 @@ static void perf_output_read(struct perf_output_handle *handle,  	 * because of locking issue as we are called in  	 * NMI context  	 */ -	if (read_format & PERF_FORMAT_TOTAL_TIMES) { -		now = perf_clock(); -		ctx_time = event->shadow_ctx_time + now; -		enabled = ctx_time - event->tstamp_enabled; -		running = ctx_time - event->tstamp_running; -	} +	if (read_format & PERF_FORMAT_TOTAL_TIMES) +		calc_timer_values(event, &enabled, &running);  	if (event->attr.read_format & PERF_FORMAT_GROUP)  		perf_output_read_group(handle, event, enabled, running); @@ -4319,7 +3906,7 @@ void perf_output_sample(struct perf_output_handle *handle,  			size *= sizeof(u64); -			perf_output_copy(handle, data->callchain, size); +			__output_copy(handle, data->callchain, size);  		} else {  			u64 nr = 0;  			perf_output_put(handle, nr); @@ -4329,8 +3916,8 @@ void perf_output_sample(struct perf_output_handle *handle,  	if (sample_type & PERF_SAMPLE_RAW) {  		if (data->raw) {  			perf_output_put(handle, data->raw->size); -			perf_output_copy(handle, data->raw->data, -					 data->raw->size); +			__output_copy(handle, data->raw->data, +					   data->raw->size);  		} else {  			struct {  				u32	size; @@ -4342,6 +3929,20 @@ void perf_output_sample(struct perf_output_handle *handle,  			perf_output_put(handle, raw);  		}  	} + +	if (!event->attr.watermark) { +		int wakeup_events = event->attr.wakeup_events; + +		if (wakeup_events) { +			struct ring_buffer *rb = handle->rb; +			int events = local_inc_return(&rb->events); + +			if (events >= wakeup_events) { +				local_sub(wakeup_events, &rb->events); +				local_inc(&rb->wakeup); +			} +		} +	}  }  void perf_prepare_sample(struct perf_event_header *header, @@ -4386,7 +3987,7 @@ void perf_prepare_sample(struct perf_event_header *header,  	}  } -static void perf_event_output(struct perf_event *event, int nmi, +static void perf_event_output(struct perf_event *event,  				struct perf_sample_data *data,  				struct pt_regs *regs)  { @@ -4398,7 +3999,7 @@ static void perf_event_output(struct perf_event *event, int nmi,  	perf_prepare_sample(&header, data, event, regs); -	if (perf_output_begin(&handle, event, header.size, nmi, 1)) +	if (perf_output_begin(&handle, event, header.size))  		goto exit;  	perf_output_sample(&handle, &header, data, event); @@ -4438,7 +4039,7 @@ perf_event_read_event(struct perf_event *event,  	int ret;  	perf_event_header__init_id(&read_event.header, &sample, event); -	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); +	ret = perf_output_begin(&handle, event, read_event.header.size);  	if (ret)  		return; @@ -4481,7 +4082,7 @@ static void perf_event_task_output(struct perf_event *event,  	perf_event_header__init_id(&task_event->event_id.header, &sample, event);  	ret = perf_output_begin(&handle, event, -				task_event->event_id.header.size, 0, 0); +				task_event->event_id.header.size);  	if (ret)  		goto out; @@ -4618,7 +4219,7 @@ static void perf_event_comm_output(struct perf_event *event,  	perf_event_header__init_id(&comm_event->event_id.header, &sample, event);  	ret = perf_output_begin(&handle, event, -				comm_event->event_id.header.size, 0, 0); +				comm_event->event_id.header.size);  	if (ret)  		goto out; @@ -4627,7 +4228,7 @@ static void perf_event_comm_output(struct perf_event *event,  	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);  	perf_output_put(&handle, comm_event->event_id); -	perf_output_copy(&handle, comm_event->comm, +	__output_copy(&handle, comm_event->comm,  				   comm_event->comm_size);  	perf_event__output_id_sample(event, &handle, &sample); @@ -4765,7 +4366,7 @@ static void perf_event_mmap_output(struct perf_event *event,  	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);  	ret = perf_output_begin(&handle, event, -				mmap_event->event_id.header.size, 0, 0); +				mmap_event->event_id.header.size);  	if (ret)  		goto out; @@ -4773,7 +4374,7 @@ static void perf_event_mmap_output(struct perf_event *event,  	mmap_event->event_id.tid = perf_event_tid(event, current);  	perf_output_put(&handle, mmap_event->event_id); -	perf_output_copy(&handle, mmap_event->file_name, +	__output_copy(&handle, mmap_event->file_name,  				   mmap_event->file_size);  	perf_event__output_id_sample(event, &handle, &sample); @@ -4829,7 +4430,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)  	if (file) {  		/* -		 * d_path works from the end of the buffer backwards, so we +		 * d_path works from the end of the rb backwards, so we  		 * need to add enough zero bytes after the string to handle  		 * the 64bit alignment we do later.  		 */ @@ -4960,7 +4561,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)  	perf_event_header__init_id(&throttle_event.header, &sample, event);  	ret = perf_output_begin(&handle, event, -				throttle_event.header.size, 1, 0); +				throttle_event.header.size);  	if (ret)  		return; @@ -4973,7 +4574,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)   * Generic event overflow handling, sampling.   */ -static int __perf_event_overflow(struct perf_event *event, int nmi, +static int __perf_event_overflow(struct perf_event *event,  				   int throttle, struct perf_sample_data *data,  				   struct pt_regs *regs)  { @@ -5016,34 +4617,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,  	if (events && atomic_dec_and_test(&event->event_limit)) {  		ret = 1;  		event->pending_kill = POLL_HUP; -		if (nmi) { -			event->pending_disable = 1; -			irq_work_queue(&event->pending); -		} else -			perf_event_disable(event); +		event->pending_disable = 1; +		irq_work_queue(&event->pending);  	}  	if (event->overflow_handler) -		event->overflow_handler(event, nmi, data, regs); +		event->overflow_handler(event, data, regs);  	else -		perf_event_output(event, nmi, data, regs); +		perf_event_output(event, data, regs);  	if (event->fasync && event->pending_kill) { -		if (nmi) { -			event->pending_wakeup = 1; -			irq_work_queue(&event->pending); -		} else -			perf_event_wakeup(event); +		event->pending_wakeup = 1; +		irq_work_queue(&event->pending);  	}  	return ret;  } -int perf_event_overflow(struct perf_event *event, int nmi, +int perf_event_overflow(struct perf_event *event,  			  struct perf_sample_data *data,  			  struct pt_regs *regs)  { -	return __perf_event_overflow(event, nmi, 1, data, regs); +	return __perf_event_overflow(event, 1, data, regs);  }  /* @@ -5092,7 +4687,7 @@ again:  }  static void perf_swevent_overflow(struct perf_event *event, u64 overflow, -				    int nmi, struct perf_sample_data *data, +				    struct perf_sample_data *data,  				    struct pt_regs *regs)  {  	struct hw_perf_event *hwc = &event->hw; @@ -5106,7 +4701,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,  		return;  	for (; overflow; overflow--) { -		if (__perf_event_overflow(event, nmi, throttle, +		if (__perf_event_overflow(event, throttle,  					    data, regs)) {  			/*  			 * We inhibit the overflow from happening when @@ -5119,7 +4714,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,  }  static void perf_swevent_event(struct perf_event *event, u64 nr, -			       int nmi, struct perf_sample_data *data, +			       struct perf_sample_data *data,  			       struct pt_regs *regs)  {  	struct hw_perf_event *hwc = &event->hw; @@ -5133,12 +4728,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,  		return;  	if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) -		return perf_swevent_overflow(event, 1, nmi, data, regs); +		return perf_swevent_overflow(event, 1, data, regs);  	if (local64_add_negative(nr, &hwc->period_left))  		return; -	perf_swevent_overflow(event, 0, nmi, data, regs); +	perf_swevent_overflow(event, 0, data, regs);  }  static int perf_exclude_event(struct perf_event *event, @@ -5226,7 +4821,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)  }  static void do_perf_sw_event(enum perf_type_id type, u32 event_id, -				    u64 nr, int nmi, +				    u64 nr,  				    struct perf_sample_data *data,  				    struct pt_regs *regs)  { @@ -5242,7 +4837,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,  	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {  		if (perf_swevent_match(event, type, event_id, data, regs)) -			perf_swevent_event(event, nr, nmi, data, regs); +			perf_swevent_event(event, nr, data, regs);  	}  end:  	rcu_read_unlock(); @@ -5263,8 +4858,7 @@ inline void perf_swevent_put_recursion_context(int rctx)  	put_recursion_context(swhash->recursion, rctx);  } -void __perf_sw_event(u32 event_id, u64 nr, int nmi, -			    struct pt_regs *regs, u64 addr) +void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)  {  	struct perf_sample_data data;  	int rctx; @@ -5276,7 +4870,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,  	perf_sample_data_init(&data, addr); -	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); +	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);  	perf_swevent_put_recursion_context(rctx);  	preempt_enable_notrace(); @@ -5524,7 +5118,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,  	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {  		if (perf_tp_event_match(event, &data, regs)) -			perf_swevent_event(event, count, 1, &data, regs); +			perf_swevent_event(event, count, &data, regs);  	}  	perf_swevent_put_recursion_context(rctx); @@ -5617,7 +5211,7 @@ void perf_bp_event(struct perf_event *bp, void *data)  	perf_sample_data_init(&sample, bp->attr.bp_addr);  	if (!bp->hw.state && !perf_exclude_event(bp, regs)) -		perf_swevent_event(bp, 1, 1, &sample, regs); +		perf_swevent_event(bp, 1, &sample, regs);  }  #endif @@ -5646,7 +5240,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)  	if (regs && !perf_exclude_event(event, regs)) {  		if (!(event->attr.exclude_idle && current->pid == 0)) -			if (perf_event_overflow(event, 0, &data, regs)) +			if (perf_event_overflow(event, &data, regs))  				ret = HRTIMER_NORESTART;  	} @@ -5986,6 +5580,7 @@ free_dev:  }  static struct lock_class_key cpuctx_mutex; +static struct lock_class_key cpuctx_lock;  int perf_pmu_register(struct pmu *pmu, char *name, int type)  { @@ -6036,6 +5631,7 @@ skip_type:  		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);  		__perf_event_init_context(&cpuctx->ctx);  		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); +		lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);  		cpuctx->ctx.type = cpu_context;  		cpuctx->ctx.pmu = pmu;  		cpuctx->jiffies_interval = 1; @@ -6150,7 +5746,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,  		 struct task_struct *task,  		 struct perf_event *group_leader,  		 struct perf_event *parent_event, -		 perf_overflow_handler_t overflow_handler) +		 perf_overflow_handler_t overflow_handler, +		 void *context)  {  	struct pmu *pmu;  	struct perf_event *event; @@ -6208,10 +5805,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,  #endif  	} -	if (!overflow_handler && parent_event) +	if (!overflow_handler && parent_event) {  		overflow_handler = parent_event->overflow_handler; +		context = parent_event->overflow_handler_context; +	}  	event->overflow_handler	= overflow_handler; +	event->overflow_handler_context = context;  	if (attr->disabled)  		event->state = PERF_EVENT_STATE_OFF; @@ -6326,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,  	if (ret)  		return -EFAULT; -	/* -	 * If the type exists, the corresponding creation will verify -	 * the attr->config. -	 */ -	if (attr->type >= PERF_TYPE_MAX) -		return -EINVAL; -  	if (attr->__reserved_1)  		return -EINVAL; @@ -6354,7 +5947,7 @@ err_size:  static int  perf_event_set_output(struct perf_event *event, struct perf_event *output_event)  { -	struct perf_buffer *buffer = NULL, *old_buffer = NULL; +	struct ring_buffer *rb = NULL, *old_rb = NULL;  	int ret = -EINVAL;  	if (!output_event) @@ -6371,7 +5964,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)  		goto out;  	/* -	 * If its not a per-cpu buffer, it must be the same task. +	 * If its not a per-cpu rb, it must be the same task.  	 */  	if (output_event->cpu == -1 && output_event->ctx != event->ctx)  		goto out; @@ -6383,20 +5976,20 @@ set:  		goto unlock;  	if (output_event) { -		/* get the buffer we want to redirect to */ -		buffer = perf_buffer_get(output_event); -		if (!buffer) +		/* get the rb we want to redirect to */ +		rb = ring_buffer_get(output_event); +		if (!rb)  			goto unlock;  	} -	old_buffer = event->buffer; -	rcu_assign_pointer(event->buffer, buffer); +	old_rb = event->rb; +	rcu_assign_pointer(event->rb, rb);  	ret = 0;  unlock:  	mutex_unlock(&event->mmap_mutex); -	if (old_buffer) -		perf_buffer_put(old_buffer); +	if (old_rb) +		ring_buffer_put(old_rb);  out:  	return ret;  } @@ -6478,7 +6071,8 @@ SYSCALL_DEFINE5(perf_event_open,  		}  	} -	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL); +	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, +				 NULL, NULL);  	if (IS_ERR(event)) {  		err = PTR_ERR(event);  		goto err_task; @@ -6663,7 +6257,8 @@ err_fd:  struct perf_event *  perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,  				 struct task_struct *task, -				 perf_overflow_handler_t overflow_handler) +				 perf_overflow_handler_t overflow_handler, +				 void *context)  {  	struct perf_event_context *ctx;  	struct perf_event *event; @@ -6673,7 +6268,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,  	 * Get the target context (task or percpu):  	 */ -	event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler); +	event = perf_event_alloc(attr, cpu, task, NULL, NULL, +				 overflow_handler, context);  	if (IS_ERR(event)) {  		err = PTR_ERR(event);  		goto err; @@ -6780,7 +6376,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)  	 * our context.  	 */  	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); -	task_ctx_sched_out(child_ctx, EVENT_ALL);  	/*  	 * Take the context lock here so that if find_get_context is @@ -6788,6 +6383,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)  	 * incremented the context's refcount before we do put_ctx below.  	 */  	raw_spin_lock(&child_ctx->lock); +	task_ctx_sched_out(child_ctx);  	child->perf_event_ctxp[ctxn] = NULL;  	/*  	 * If this context is a clone; unclone it so it can't get @@ -6957,7 +6553,7 @@ inherit_event(struct perf_event *parent_event,  					   parent_event->cpu,  					   child,  					   group_leader, parent_event, -					   NULL); +				           NULL, NULL);  	if (IS_ERR(child_event))  		return child_event;  	get_ctx(child_ctx); @@ -6984,6 +6580,8 @@ inherit_event(struct perf_event *parent_event,  	child_event->ctx = child_ctx;  	child_event->overflow_handler = parent_event->overflow_handler; +	child_event->overflow_handler_context +		= parent_event->overflow_handler_context;  	/*  	 * Precalculate sample_data sizes diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 086adf25a55e..b7971d6f38bf 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -431,9 +431,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp)  struct perf_event *  register_user_hw_breakpoint(struct perf_event_attr *attr,  			    perf_overflow_handler_t triggered, +			    void *context,  			    struct task_struct *tsk)  { -	return perf_event_create_kernel_counter(attr, -1, tsk, triggered); +	return perf_event_create_kernel_counter(attr, -1, tsk, triggered, +						context);  }  EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); @@ -502,7 +504,8 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);   */  struct perf_event * __percpu *  register_wide_hw_breakpoint(struct perf_event_attr *attr, -			    perf_overflow_handler_t triggered) +			    perf_overflow_handler_t triggered, +			    void *context)  {  	struct perf_event * __percpu *cpu_events, **pevent, *bp;  	long err; @@ -515,7 +518,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,  	get_online_cpus();  	for_each_online_cpu(cpu) {  		pevent = per_cpu_ptr(cpu_events, cpu); -		bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); +		bp = perf_event_create_kernel_counter(attr, cpu, NULL, +						      triggered, context);  		*pevent = bp; diff --git a/kernel/events/internal.h b/kernel/events/internal.h new file mode 100644 index 000000000000..09097dd8116c --- /dev/null +++ b/kernel/events/internal.h @@ -0,0 +1,96 @@ +#ifndef _KERNEL_EVENTS_INTERNAL_H +#define _KERNEL_EVENTS_INTERNAL_H + +#define RING_BUFFER_WRITABLE		0x01 + +struct ring_buffer { +	atomic_t			refcount; +	struct rcu_head			rcu_head; +#ifdef CONFIG_PERF_USE_VMALLOC +	struct work_struct		work; +	int				page_order;	/* allocation order  */ +#endif +	int				nr_pages;	/* nr of data pages  */ +	int				writable;	/* are we writable   */ + +	atomic_t			poll;		/* POLL_ for wakeups */ + +	local_t				head;		/* write position    */ +	local_t				nest;		/* nested writers    */ +	local_t				events;		/* event limit       */ +	local_t				wakeup;		/* wakeup stamp      */ +	local_t				lost;		/* nr records lost   */ + +	long				watermark;	/* wakeup watermark  */ + +	struct perf_event_mmap_page	*user_page; +	void				*data_pages[0]; +}; + +extern void rb_free(struct ring_buffer *rb); +extern struct ring_buffer * +rb_alloc(int nr_pages, long watermark, int cpu, int flags); +extern void perf_event_wakeup(struct perf_event *event); + +extern void +perf_event_header__init_id(struct perf_event_header *header, +			   struct perf_sample_data *data, +			   struct perf_event *event); +extern void +perf_event__output_id_sample(struct perf_event *event, +			     struct perf_output_handle *handle, +			     struct perf_sample_data *sample); + +extern struct page * +perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); + +#ifdef CONFIG_PERF_USE_VMALLOC +/* + * Back perf_mmap() with vmalloc memory. + * + * Required for architectures that have d-cache aliasing issues. + */ + +static inline int page_order(struct ring_buffer *rb) +{ +	return rb->page_order; +} + +#else + +static inline int page_order(struct ring_buffer *rb) +{ +	return 0; +} +#endif + +static unsigned long perf_data_size(struct ring_buffer *rb) +{ +	return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); +} + +static inline void +__output_copy(struct perf_output_handle *handle, +		   const void *buf, unsigned int len) +{ +	do { +		unsigned long size = min_t(unsigned long, handle->size, len); + +		memcpy(handle->addr, buf, size); + +		len -= size; +		handle->addr += size; +		buf += size; +		handle->size -= size; +		if (!handle->size) { +			struct ring_buffer *rb = handle->rb; + +			handle->page++; +			handle->page &= rb->nr_pages - 1; +			handle->addr = rb->data_pages[handle->page]; +			handle->size = PAGE_SIZE << page_order(rb); +		} +	} while (len); +} + +#endif /* _KERNEL_EVENTS_INTERNAL_H */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c new file mode 100644 index 000000000000..a2a29205cc0f --- /dev/null +++ b/kernel/events/ring_buffer.c @@ -0,0 +1,380 @@ +/* + * Performance events ring-buffer code: + * + *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> + *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar + *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * For licensing details see kernel-base/COPYING + */ + +#include <linux/perf_event.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> + +#include "internal.h" + +static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, +			      unsigned long offset, unsigned long head) +{ +	unsigned long mask; + +	if (!rb->writable) +		return true; + +	mask = perf_data_size(rb) - 1; + +	offset = (offset - tail) & mask; +	head   = (head   - tail) & mask; + +	if ((int)(head - offset) < 0) +		return false; + +	return true; +} + +static void perf_output_wakeup(struct perf_output_handle *handle) +{ +	atomic_set(&handle->rb->poll, POLL_IN); + +	handle->event->pending_wakeup = 1; +	irq_work_queue(&handle->event->pending); +} + +/* + * We need to ensure a later event_id doesn't publish a head when a former + * event isn't done writing. However since we need to deal with NMIs we + * cannot fully serialize things. + * + * We only publish the head (and generate a wakeup) when the outer-most + * event completes. + */ +static void perf_output_get_handle(struct perf_output_handle *handle) +{ +	struct ring_buffer *rb = handle->rb; + +	preempt_disable(); +	local_inc(&rb->nest); +	handle->wakeup = local_read(&rb->wakeup); +} + +static void perf_output_put_handle(struct perf_output_handle *handle) +{ +	struct ring_buffer *rb = handle->rb; +	unsigned long head; + +again: +	head = local_read(&rb->head); + +	/* +	 * IRQ/NMI can happen here, which means we can miss a head update. +	 */ + +	if (!local_dec_and_test(&rb->nest)) +		goto out; + +	/* +	 * Publish the known good head. Rely on the full barrier implied +	 * by atomic_dec_and_test() order the rb->head read and this +	 * write. +	 */ +	rb->user_page->data_head = head; + +	/* +	 * Now check if we missed an update, rely on the (compiler) +	 * barrier in atomic_dec_and_test() to re-read rb->head. +	 */ +	if (unlikely(head != local_read(&rb->head))) { +		local_inc(&rb->nest); +		goto again; +	} + +	if (handle->wakeup != local_read(&rb->wakeup)) +		perf_output_wakeup(handle); + +out: +	preempt_enable(); +} + +int perf_output_begin(struct perf_output_handle *handle, +		      struct perf_event *event, unsigned int size) +{ +	struct ring_buffer *rb; +	unsigned long tail, offset, head; +	int have_lost; +	struct perf_sample_data sample_data; +	struct { +		struct perf_event_header header; +		u64			 id; +		u64			 lost; +	} lost_event; + +	rcu_read_lock(); +	/* +	 * For inherited events we send all the output towards the parent. +	 */ +	if (event->parent) +		event = event->parent; + +	rb = rcu_dereference(event->rb); +	if (!rb) +		goto out; + +	handle->rb	= rb; +	handle->event	= event; + +	if (!rb->nr_pages) +		goto out; + +	have_lost = local_read(&rb->lost); +	if (have_lost) { +		lost_event.header.size = sizeof(lost_event); +		perf_event_header__init_id(&lost_event.header, &sample_data, +					   event); +		size += lost_event.header.size; +	} + +	perf_output_get_handle(handle); + +	do { +		/* +		 * Userspace could choose to issue a mb() before updating the +		 * tail pointer. So that all reads will be completed before the +		 * write is issued. +		 */ +		tail = ACCESS_ONCE(rb->user_page->data_tail); +		smp_rmb(); +		offset = head = local_read(&rb->head); +		head += size; +		if (unlikely(!perf_output_space(rb, tail, offset, head))) +			goto fail; +	} while (local_cmpxchg(&rb->head, offset, head) != offset); + +	if (head - local_read(&rb->wakeup) > rb->watermark) +		local_add(rb->watermark, &rb->wakeup); + +	handle->page = offset >> (PAGE_SHIFT + page_order(rb)); +	handle->page &= rb->nr_pages - 1; +	handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1); +	handle->addr = rb->data_pages[handle->page]; +	handle->addr += handle->size; +	handle->size = (PAGE_SIZE << page_order(rb)) - handle->size; + +	if (have_lost) { +		lost_event.header.type = PERF_RECORD_LOST; +		lost_event.header.misc = 0; +		lost_event.id          = event->id; +		lost_event.lost        = local_xchg(&rb->lost, 0); + +		perf_output_put(handle, lost_event); +		perf_event__output_id_sample(event, handle, &sample_data); +	} + +	return 0; + +fail: +	local_inc(&rb->lost); +	perf_output_put_handle(handle); +out: +	rcu_read_unlock(); + +	return -ENOSPC; +} + +void perf_output_copy(struct perf_output_handle *handle, +		      const void *buf, unsigned int len) +{ +	__output_copy(handle, buf, len); +} + +void perf_output_end(struct perf_output_handle *handle) +{ +	perf_output_put_handle(handle); +	rcu_read_unlock(); +} + +static void +ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) +{ +	long max_size = perf_data_size(rb); + +	if (watermark) +		rb->watermark = min(max_size, watermark); + +	if (!rb->watermark) +		rb->watermark = max_size / 2; + +	if (flags & RING_BUFFER_WRITABLE) +		rb->writable = 1; + +	atomic_set(&rb->refcount, 1); +} + +#ifndef CONFIG_PERF_USE_VMALLOC + +/* + * Back perf_mmap() with regular GFP_KERNEL-0 pages. + */ + +struct page * +perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +{ +	if (pgoff > rb->nr_pages) +		return NULL; + +	if (pgoff == 0) +		return virt_to_page(rb->user_page); + +	return virt_to_page(rb->data_pages[pgoff - 1]); +} + +static void *perf_mmap_alloc_page(int cpu) +{ +	struct page *page; +	int node; + +	node = (cpu == -1) ? cpu : cpu_to_node(cpu); +	page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); +	if (!page) +		return NULL; + +	return page_address(page); +} + +struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) +{ +	struct ring_buffer *rb; +	unsigned long size; +	int i; + +	size = sizeof(struct ring_buffer); +	size += nr_pages * sizeof(void *); + +	rb = kzalloc(size, GFP_KERNEL); +	if (!rb) +		goto fail; + +	rb->user_page = perf_mmap_alloc_page(cpu); +	if (!rb->user_page) +		goto fail_user_page; + +	for (i = 0; i < nr_pages; i++) { +		rb->data_pages[i] = perf_mmap_alloc_page(cpu); +		if (!rb->data_pages[i]) +			goto fail_data_pages; +	} + +	rb->nr_pages = nr_pages; + +	ring_buffer_init(rb, watermark, flags); + +	return rb; + +fail_data_pages: +	for (i--; i >= 0; i--) +		free_page((unsigned long)rb->data_pages[i]); + +	free_page((unsigned long)rb->user_page); + +fail_user_page: +	kfree(rb); + +fail: +	return NULL; +} + +static void perf_mmap_free_page(unsigned long addr) +{ +	struct page *page = virt_to_page((void *)addr); + +	page->mapping = NULL; +	__free_page(page); +} + +void rb_free(struct ring_buffer *rb) +{ +	int i; + +	perf_mmap_free_page((unsigned long)rb->user_page); +	for (i = 0; i < rb->nr_pages; i++) +		perf_mmap_free_page((unsigned long)rb->data_pages[i]); +	kfree(rb); +} + +#else + +struct page * +perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) +{ +	if (pgoff > (1UL << page_order(rb))) +		return NULL; + +	return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); +} + +static void perf_mmap_unmark_page(void *addr) +{ +	struct page *page = vmalloc_to_page(addr); + +	page->mapping = NULL; +} + +static void rb_free_work(struct work_struct *work) +{ +	struct ring_buffer *rb; +	void *base; +	int i, nr; + +	rb = container_of(work, struct ring_buffer, work); +	nr = 1 << page_order(rb); + +	base = rb->user_page; +	for (i = 0; i < nr + 1; i++) +		perf_mmap_unmark_page(base + (i * PAGE_SIZE)); + +	vfree(base); +	kfree(rb); +} + +void rb_free(struct ring_buffer *rb) +{ +	schedule_work(&rb->work); +} + +struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) +{ +	struct ring_buffer *rb; +	unsigned long size; +	void *all_buf; + +	size = sizeof(struct ring_buffer); +	size += sizeof(void *); + +	rb = kzalloc(size, GFP_KERNEL); +	if (!rb) +		goto fail; + +	INIT_WORK(&rb->work, rb_free_work); + +	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); +	if (!all_buf) +		goto fail_all_buf; + +	rb->user_page = all_buf; +	rb->data_pages[0] = all_buf + PAGE_SIZE; +	rb->page_order = ilog2(nr_pages); +	rb->nr_pages = 1; + +	ring_buffer_init(rb, watermark, flags); + +	return rb; + +fail_all_buf: +	kfree(rb); + +fail: +	return NULL; +} + +#endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 77981813a1e7..b30fd54eb985 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr)  /*   * If we have a symbol_name argument, look it up and add the offset field   * to it. This way, we can specify a relative address to a symbol. + * This returns encoded errors if it fails to look up symbol or invalid + * combination of parameters.   */  static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)  {  	kprobe_opcode_t *addr = p->addr; + +	if ((p->symbol_name && p->addr) || +	    (!p->symbol_name && !p->addr)) +		goto invalid; +  	if (p->symbol_name) { -		if (addr) -			return NULL;  		kprobe_lookup_name(p->symbol_name, addr); +		if (!addr) +			return ERR_PTR(-ENOENT);  	} -	if (!addr) -		return NULL; -	return (kprobe_opcode_t *)(((char *)addr) + p->offset); +	addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); +	if (addr) +		return addr; + +invalid: +	return ERR_PTR(-EINVAL);  }  /* Check passed kprobe is valid and return kprobe in kprobe_table. */ @@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p)  	kprobe_opcode_t *addr;  	addr = kprobe_addr(p); -	if (!addr) -		return -EINVAL; +	if (IS_ERR(addr)) +		return PTR_ERR(addr);  	p->addr = addr;  	ret = check_kprobe_rereg(p); @@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p)  	 */  	probed_mod = __module_text_address((unsigned long) p->addr);  	if (probed_mod) { +		/* Return -ENOENT if fail. */ +		ret = -ENOENT;  		/*  		 * We must hold a refcount of the probed module while updating  		 * its code to prohibit unexpected unloading. @@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)  			module_put(probed_mod);  			goto fail_with_jump_label;  		} +		/* ret will be updated by following code */  	}  	preempt_enable();  	jump_label_unlock(); @@ -1399,7 +1412,7 @@ out:  fail_with_jump_label:  	preempt_enable();  	jump_label_unlock(); -	return -EINVAL; +	return ret;  }  EXPORT_SYMBOL_GPL(register_kprobe); @@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)  	if (kretprobe_blacklist_size) {  		addr = kprobe_addr(&rp->kp); -		if (!addr) -			return -EINVAL; +		if (IS_ERR(addr)) +			return PTR_ERR(addr);  		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {  			if (kretprobe_blacklist[i].addr == addr) diff --git a/kernel/sched.c b/kernel/sched.c index c518b05fd062..84b9e076812e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)  	if (task_cpu(p) != new_cpu) {  		p->se.nr_migrations++; -		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); +		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);  	}  	__set_task_cpu(p, new_cpu); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index eb212f8f8bc8..d20c6983aad9 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -26,12 +26,18 @@ void print_stack_trace(struct stack_trace *trace, int spaces)  EXPORT_SYMBOL_GPL(print_stack_trace);  /* - * Architectures that do not implement save_stack_trace_tsk get this - * weak alias and a once-per-bootup warning (whenever this facility - * is utilized - for example by procfs): + * Architectures that do not implement save_stack_trace_tsk or + * save_stack_trace_regs get this weak alias and a once-per-bootup warning + * (whenever this facility is utilized - for example by procfs):   */  __weak void  save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)  {  	WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n");  } + +__weak void +save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ +	WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n"); +} diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 908038f57440..c3e4575e7829 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -32,7 +32,6 @@  #include <trace/events/sched.h> -#include <asm/ftrace.h>  #include <asm/setup.h>  #include "trace_output.h" @@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly;  static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops ftrace_list_end __read_mostly = -{ +static struct ftrace_ops ftrace_list_end __read_mostly = {  	.func		= ftrace_stub,  };  static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;  static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;  ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; +static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;  ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;  ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;  static struct ftrace_ops global_ops; @@ -148,9 +147,11 @@ void clear_ftrace_function(void)  {  	ftrace_trace_function = ftrace_stub;  	__ftrace_trace_function = ftrace_stub; +	__ftrace_trace_function_delay = ftrace_stub;  	ftrace_pid_function = ftrace_stub;  } +#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST  #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST  /*   * For those archs that do not test ftrace_trace_stop in their @@ -210,7 +211,12 @@ static void update_ftrace_function(void)  #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST  	ftrace_trace_function = func;  #else +#ifdef CONFIG_DYNAMIC_FTRACE +	/* do not update till all functions have been modified */ +	__ftrace_trace_function_delay = func; +#else  	__ftrace_trace_function = func; +#endif  	ftrace_trace_function = ftrace_test_stop_func;  #endif  } @@ -785,8 +791,7 @@ static void unregister_ftrace_profiler(void)  	unregister_ftrace_graph();  }  #else -static struct ftrace_ops ftrace_profile_ops __read_mostly = -{ +static struct ftrace_ops ftrace_profile_ops __read_mostly = {  	.func		= function_profile_call,  }; @@ -806,19 +811,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,  		     size_t cnt, loff_t *ppos)  {  	unsigned long val; -	char buf[64];		/* big enough to hold a number */  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	val = !!val; @@ -1182,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)  	return NULL;  } +static void +ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash); +static void +ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash); +  static int -ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) +ftrace_hash_move(struct ftrace_ops *ops, int enable, +		 struct ftrace_hash **dst, struct ftrace_hash *src)  {  	struct ftrace_func_entry *entry;  	struct hlist_node *tp, *tn; @@ -1193,9 +1195,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)  	unsigned long key;  	int size = src->count;  	int bits = 0; +	int ret;  	int i;  	/* +	 * Remove the current set, update the hash and add +	 * them back. +	 */ +	ftrace_hash_rec_disable(ops, enable); + +	/*  	 * If the new source is empty, just free dst and assign it  	 * the empty_hash.  	 */ @@ -1215,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)  	if (bits > FTRACE_HASH_MAX_BITS)  		bits = FTRACE_HASH_MAX_BITS; +	ret = -ENOMEM;  	new_hash = alloc_ftrace_hash(bits);  	if (!new_hash) -		return -ENOMEM; +		goto out;  	size = 1 << src->size_bits;  	for (i = 0; i < size; i++) { @@ -1236,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)  	rcu_assign_pointer(*dst, new_hash);  	free_ftrace_hash_rcu(old_hash); -	return 0; +	ret = 0; + out: +	/* +	 * Enable regardless of ret: +	 *  On success, we enable the new hash. +	 *  On failure, we re-enable the original hash. +	 */ +	ftrace_hash_rec_enable(ops, enable); + +	return ret;  }  /* @@ -1596,6 +1615,12 @@ static int __ftrace_modify_code(void *data)  {  	int *command = data; +	/* +	 * Do not call function tracer while we update the code. +	 * We are in stop machine, no worrying about races. +	 */ +	function_trace_stop++; +  	if (*command & FTRACE_ENABLE_CALLS)  		ftrace_replace_code(1);  	else if (*command & FTRACE_DISABLE_CALLS) @@ -1609,6 +1634,18 @@ static int __ftrace_modify_code(void *data)  	else if (*command & FTRACE_STOP_FUNC_RET)  		ftrace_disable_ftrace_graph_caller(); +#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST +	/* +	 * For archs that call ftrace_test_stop_func(), we must +	 * wait till after we update all the function callers +	 * before we update the callback. This keeps different +	 * ops that record different functions from corrupting +	 * each other. +	 */ +	__ftrace_trace_function = __ftrace_trace_function_delay; +#endif +	function_trace_stop--; +  	return 0;  } @@ -1744,10 +1781,36 @@ static cycle_t		ftrace_update_time;  static unsigned long	ftrace_update_cnt;  unsigned long		ftrace_update_tot_cnt; +static int ops_traces_mod(struct ftrace_ops *ops) +{ +	struct ftrace_hash *hash; + +	hash = ops->filter_hash; +	return !!(!hash || !hash->count); +} +  static int ftrace_update_code(struct module *mod)  {  	struct dyn_ftrace *p;  	cycle_t start, stop; +	unsigned long ref = 0; + +	/* +	 * When adding a module, we need to check if tracers are +	 * currently enabled and if they are set to trace all functions. +	 * If they are, we need to enable the module functions as well +	 * as update the reference counts for those function records. +	 */ +	if (mod) { +		struct ftrace_ops *ops; + +		for (ops = ftrace_ops_list; +		     ops != &ftrace_list_end; ops = ops->next) { +			if (ops->flags & FTRACE_OPS_FL_ENABLED && +			    ops_traces_mod(ops)) +				ref++; +		} +	}  	start = ftrace_now(raw_smp_processor_id());  	ftrace_update_cnt = 0; @@ -1760,7 +1823,7 @@ static int ftrace_update_code(struct module *mod)  		p = ftrace_new_addrs;  		ftrace_new_addrs = p->newlist; -		p->flags = 0L; +		p->flags = ref;  		/*  		 * Do the initial record conversion from mcount jump @@ -1783,7 +1846,7 @@ static int ftrace_update_code(struct module *mod)  		 * conversion puts the module to the correct state, thus  		 * passing the ftrace_make_call check.  		 */ -		if (ftrace_start_up) { +		if (ftrace_start_up && ref) {  			int failed = __ftrace_replace_code(p, 1);  			if (failed) {  				ftrace_bug(failed, p->ip); @@ -2407,10 +2470,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)   */  static int -ftrace_mod_callback(char *func, char *cmd, char *param, int enable) +ftrace_mod_callback(struct ftrace_hash *hash, +		    char *func, char *cmd, char *param, int enable)  { -	struct ftrace_ops *ops = &global_ops; -	struct ftrace_hash *hash;  	char *mod;  	int ret = -EINVAL; @@ -2430,11 +2492,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)  	if (!strlen(mod))  		return ret; -	if (enable) -		hash = ops->filter_hash; -	else -		hash = ops->notrace_hash; -  	ret = ftrace_match_module_records(hash, func, mod);  	if (!ret)  		ret = -EINVAL; @@ -2760,7 +2817,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,  	mutex_lock(&ftrace_cmd_mutex);  	list_for_each_entry(p, &ftrace_commands, list) {  		if (strcmp(p->name, command) == 0) { -			ret = p->func(func, command, next, enable); +			ret = p->func(hash, func, command, next, enable);  			goto out_unlock;  		}  	} @@ -2857,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,  		ftrace_match_records(hash, buf, len);  	mutex_lock(&ftrace_lock); -	ret = ftrace_hash_move(orig_hash, hash); +	ret = ftrace_hash_move(ops, enable, orig_hash, hash); +	if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED +	    && ftrace_enabled) +		ftrace_run_update_code(FTRACE_ENABLE_CALLS); +  	mutex_unlock(&ftrace_lock);  	mutex_unlock(&ftrace_regex_lock); @@ -3040,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)  			orig_hash = &iter->ops->notrace_hash;  		mutex_lock(&ftrace_lock); -		/* -		 * Remove the current set, update the hash and add -		 * them back. -		 */ -		ftrace_hash_rec_disable(iter->ops, filter_hash); -		ret = ftrace_hash_move(orig_hash, iter->hash); -		if (!ret) { -			ftrace_hash_rec_enable(iter->ops, filter_hash); -			if (iter->ops->flags & FTRACE_OPS_FL_ENABLED -			    && ftrace_enabled) -				ftrace_run_update_code(FTRACE_ENABLE_CALLS); -		} +		ret = ftrace_hash_move(iter->ops, filter_hash, +				       orig_hash, iter->hash); +		if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) +		    && ftrace_enabled) +			ftrace_run_update_code(FTRACE_ENABLE_CALLS); +  		mutex_unlock(&ftrace_lock);  	}  	free_ftrace_hash(iter->hash); @@ -3330,7 +3385,7 @@ static int ftrace_process_locs(struct module *mod,  {  	unsigned long *p;  	unsigned long addr; -	unsigned long flags; +	unsigned long flags = 0; /* Shut up gcc */  	mutex_lock(&ftrace_lock);  	p = start; @@ -3348,12 +3403,18 @@ static int ftrace_process_locs(struct module *mod,  	}  	/* -	 * Disable interrupts to prevent interrupts from executing -	 * code that is being modified. +	 * We only need to disable interrupts on start up +	 * because we are modifying code that an interrupt +	 * may execute, and the modification is not atomic. +	 * But for modules, nothing runs the code we modify +	 * until we are finished with it, and there's no +	 * reason to cause large interrupt latencies while we do it.  	 */ -	local_irq_save(flags); +	if (!mod) +		local_irq_save(flags);  	ftrace_update_code(mod); -	local_irq_restore(flags); +	if (!mod) +		local_irq_restore(flags);  	mutex_unlock(&ftrace_lock);  	return 0; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b0c7aa407943..731201bf4acc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,  			     unsigned nr_pages)  {  	struct buffer_page *bpage, *tmp; -	unsigned long addr;  	LIST_HEAD(pages);  	unsigned i;  	WARN_ON(!nr_pages);  	for (i = 0; i < nr_pages; i++) { +		struct page *page; +		/* +		 * __GFP_NORETRY flag makes sure that the allocation fails +		 * gracefully without invoking oom-killer and the system is +		 * not destabilized. +		 */  		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), -				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); +				    GFP_KERNEL | __GFP_NORETRY, +				    cpu_to_node(cpu_buffer->cpu));  		if (!bpage)  			goto free_pages; @@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,  		list_add(&bpage->list, &pages); -		addr = __get_free_page(GFP_KERNEL); -		if (!addr) +		page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), +					GFP_KERNEL | __GFP_NORETRY, 0); +		if (!page)  			goto free_pages; -		bpage->page = (void *)addr; +		bpage->page = page_address(page);  		rb_init_page(bpage->page);  	} @@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)  {  	struct ring_buffer_per_cpu *cpu_buffer;  	struct buffer_page *bpage; -	unsigned long addr; +	struct page *page;  	int ret;  	cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), @@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)  	rb_check_bpage(cpu_buffer, bpage);  	cpu_buffer->reader_page = bpage; -	addr = __get_free_page(GFP_KERNEL); -	if (!addr) +	page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); +	if (!page)  		goto fail_free_reader; -	bpage->page = (void *)addr; +	bpage->page = page_address(page);  	rb_init_page(bpage->page);  	INIT_LIST_HEAD(&cpu_buffer->reader_page->list); @@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)  	unsigned nr_pages, rm_pages, new_pages;  	struct buffer_page *bpage, *tmp;  	unsigned long buffer_size; -	unsigned long addr;  	LIST_HEAD(pages);  	int i, cpu; @@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)  	for_each_buffer_cpu(buffer, cpu) {  		for (i = 0; i < new_pages; i++) { +			struct page *page; +			/* +			 * __GFP_NORETRY flag makes sure that the allocation +			 * fails gracefully without invoking oom-killer and +			 * the system is not destabilized. +			 */  			bpage = kzalloc_node(ALIGN(sizeof(*bpage),  						  cache_line_size()), -					    GFP_KERNEL, cpu_to_node(cpu)); +					    GFP_KERNEL | __GFP_NORETRY, +					    cpu_to_node(cpu));  			if (!bpage)  				goto free_pages;  			list_add(&bpage->list, &pages); -			addr = __get_free_page(GFP_KERNEL); -			if (!addr) +			page = alloc_pages_node(cpu_to_node(cpu), +						GFP_KERNEL | __GFP_NORETRY, 0); +			if (!page)  				goto free_pages; -			bpage->page = (void *)addr; +			bpage->page = page_address(page);  			rb_init_page(bpage->page);  		}  	} @@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);   * Returns:   *  The page allocated, or NULL on error.   */ -void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)  {  	struct buffer_data_page *bpage; -	unsigned long addr; +	struct page *page; -	addr = __get_free_page(GFP_KERNEL); -	if (!addr) +	page = alloc_pages_node(cpu_to_node(cpu), +				GFP_KERNEL | __GFP_NORETRY, 0); +	if (!page)  		return NULL; -	bpage = (void *)addr; +	bpage = page_address(page);  	rb_init_page(bpage); @@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,  		size_t cnt, loff_t *ppos)  {  	unsigned long *p = filp->private_data; -	char buf[64];  	unsigned long val;  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	if (val) diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 302f8a614635..a5457d577b98 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)  	int inc;  	int i; -	bpage = ring_buffer_alloc_read_page(buffer); +	bpage = ring_buffer_alloc_read_page(buffer, cpu);  	if (!bpage)  		return EVENT_DROPPED; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee9c921d7f21..e5df02c69b1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |  static int trace_stop_count;  static DEFINE_SPINLOCK(tracing_start_lock); +static void wakeup_work_handler(struct work_struct *work) +{ +	wake_up(&trace_wait); +} + +static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); +  /**   * trace_wake_up - wake up tasks waiting for trace input   * - * Simply wakes up any task that is blocked on the trace_wait - * queue. These is used with trace_poll for tasks polling the trace. + * Schedules a delayed work to wake up any task that is blocked on the + * trace_wait queue. These is used with trace_poll for tasks polling the + * trace.   */  void trace_wake_up(void)  { -	int cpu; +	const unsigned long delay = msecs_to_jiffies(2);  	if (trace_flags & TRACE_ITER_BLOCK)  		return; -	/* -	 * The runqueue_is_locked() can fail, but this is the best we -	 * have for now: -	 */ -	cpu = get_cpu(); -	if (!runqueue_is_locked(cpu)) -		wake_up(&trace_wait); -	put_cpu(); +	schedule_delayed_work(&wakeup_work, delay);  }  static int __init set_buf_size(char *str) @@ -424,6 +425,7 @@ static const char *trace_options[] = {  	"graph-time",  	"record-cmd",  	"overwrite", +	"disable_on_free",  	NULL  }; @@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,  }  EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); +void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, +					    struct ring_buffer_event *event, +					    unsigned long flags, int pc, +					    struct pt_regs *regs) +{ +	ring_buffer_unlock_commit(buffer, event); + +	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); +	ftrace_trace_userstack(buffer, flags, pc); +} +EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); +  void trace_current_buffer_discard_commit(struct ring_buffer *buffer,  					 struct ring_buffer_event *event)  { @@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,  }  #ifdef CONFIG_STACKTRACE + +#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) +struct ftrace_stack { +	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES]; +}; + +static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack); +static DEFINE_PER_CPU(int, ftrace_stack_reserve); +  static void __ftrace_trace_stack(struct ring_buffer *buffer,  				 unsigned long flags, -				 int skip, int pc) +				 int skip, int pc, struct pt_regs *regs)  {  	struct ftrace_event_call *call = &event_kernel_stack;  	struct ring_buffer_event *event;  	struct stack_entry *entry;  	struct stack_trace trace; +	int use_stack; +	int size = FTRACE_STACK_ENTRIES; + +	trace.nr_entries	= 0; +	trace.skip		= skip; + +	/* +	 * Since events can happen in NMIs there's no safe way to +	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt +	 * or NMI comes in, it will just have to use the default +	 * FTRACE_STACK_SIZE. +	 */ +	preempt_disable_notrace(); + +	use_stack = ++__get_cpu_var(ftrace_stack_reserve); +	/* +	 * We don't need any atomic variables, just a barrier. +	 * If an interrupt comes in, we don't care, because it would +	 * have exited and put the counter back to what we want. +	 * We just need a barrier to keep gcc from moving things +	 * around. +	 */ +	barrier(); +	if (use_stack == 1) { +		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0]; +		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES; + +		if (regs) +			save_stack_trace_regs(regs, &trace); +		else +			save_stack_trace(&trace); + +		if (trace.nr_entries > size) +			size = trace.nr_entries; +	} else +		/* From now on, use_stack is a boolean */ +		use_stack = 0; + +	size *= sizeof(unsigned long);  	event = trace_buffer_lock_reserve(buffer, TRACE_STACK, -					  sizeof(*entry), flags, pc); +					  sizeof(*entry) + size, flags, pc);  	if (!event) -		return; -	entry	= ring_buffer_event_data(event); -	memset(&entry->caller, 0, sizeof(entry->caller)); +		goto out; +	entry = ring_buffer_event_data(event); -	trace.nr_entries	= 0; -	trace.max_entries	= FTRACE_STACK_ENTRIES; -	trace.skip		= skip; -	trace.entries		= entry->caller; +	memset(&entry->caller, 0, size); + +	if (use_stack) +		memcpy(&entry->caller, trace.entries, +		       trace.nr_entries * sizeof(unsigned long)); +	else { +		trace.max_entries	= FTRACE_STACK_ENTRIES; +		trace.entries		= entry->caller; +		if (regs) +			save_stack_trace_regs(regs, &trace); +		else +			save_stack_trace(&trace); +	} + +	entry->size = trace.nr_entries; -	save_stack_trace(&trace);  	if (!filter_check_discard(call, entry, buffer, event))  		ring_buffer_unlock_commit(buffer, event); + + out: +	/* Again, don't let gcc optimize things here */ +	barrier(); +	__get_cpu_var(ftrace_stack_reserve)--; +	preempt_enable_notrace(); + +} + +void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, +			     int skip, int pc, struct pt_regs *regs) +{ +	if (!(trace_flags & TRACE_ITER_STACKTRACE)) +		return; + +	__ftrace_trace_stack(buffer, flags, skip, pc, regs);  }  void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, @@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,  	if (!(trace_flags & TRACE_ITER_STACKTRACE))  		return; -	__ftrace_trace_stack(buffer, flags, skip, pc); +	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);  }  void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,  		   int pc)  { -	__ftrace_trace_stack(tr->buffer, flags, skip, pc); +	__ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);  }  /** @@ -1288,7 +1375,7 @@ void trace_dump_stack(void)  	local_save_flags(flags);  	/* skipping 3 traces, seems to get us at the caller of this function */ -	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); +	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);  }  static DEFINE_PER_CPU(int, user_stack_count); @@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,  	ftrace_enable_cpu(); -	return event ? ring_buffer_event_data(event) : NULL; +	if (event) { +		iter->ent_size = ring_buffer_event_length(event); +		return ring_buffer_event_data(event); +	} +	iter->ent_size = 0; +	return NULL;  }  static struct trace_entry * @@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m)  {  	struct trace_iterator *iter = m->private; +	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) +		return; +  	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {  		/* print nothing if the buffers are empty */  		if (trace_empty(iter)) @@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,  		   size_t cnt, loff_t *ppos)  {  	struct trace_array *tr = filp->private_data; -	char buf[64];  	unsigned long val;  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	val = !!val; @@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)  	return t->init(tr);  } -static int tracing_resize_ring_buffer(unsigned long size) +static int __tracing_resize_ring_buffer(unsigned long size)  {  	int ret; @@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size)  	return ret;  } +static ssize_t tracing_resize_ring_buffer(unsigned long size) +{ +	int cpu, ret = size; + +	mutex_lock(&trace_types_lock); + +	tracing_stop(); + +	/* disable all cpu buffers */ +	for_each_tracing_cpu(cpu) { +		if (global_trace.data[cpu]) +			atomic_inc(&global_trace.data[cpu]->disabled); +		if (max_tr.data[cpu]) +			atomic_inc(&max_tr.data[cpu]->disabled); +	} + +	if (size != global_trace.entries) +		ret = __tracing_resize_ring_buffer(size); + +	if (ret < 0) +		ret = -ENOMEM; + +	for_each_tracing_cpu(cpu) { +		if (global_trace.data[cpu]) +			atomic_dec(&global_trace.data[cpu]->disabled); +		if (max_tr.data[cpu]) +			atomic_dec(&max_tr.data[cpu]->disabled); +	} + +	tracing_start(); +	mutex_unlock(&trace_types_lock); + +	return ret; +} +  /**   * tracing_update_buffers - used by tracing facility to expand ring buffers @@ -2836,7 +2957,7 @@ int tracing_update_buffers(void)  	mutex_lock(&trace_types_lock);  	if (!ring_buffer_expanded) -		ret = tracing_resize_ring_buffer(trace_buf_size); +		ret = __tracing_resize_ring_buffer(trace_buf_size);  	mutex_unlock(&trace_types_lock);  	return ret; @@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf)  	mutex_lock(&trace_types_lock);  	if (!ring_buffer_expanded) { -		ret = tracing_resize_ring_buffer(trace_buf_size); +		ret = __tracing_resize_ring_buffer(trace_buf_size);  		if (ret < 0)  			goto out;  		ret = 0; @@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,  		      size_t cnt, loff_t *ppos)  {  	unsigned long *ptr = filp->private_data; -	char buf[64];  	unsigned long val;  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	*ptr = val * 1000; @@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,  		      size_t cnt, loff_t *ppos)  {  	unsigned long val; -	char buf[64]; -	int ret, cpu; - -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; +	int ret; -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	/* must have at least 1 entry */  	if (!val)  		return -EINVAL; -	mutex_lock(&trace_types_lock); - -	tracing_stop(); - -	/* disable all cpu buffers */ -	for_each_tracing_cpu(cpu) { -		if (global_trace.data[cpu]) -			atomic_inc(&global_trace.data[cpu]->disabled); -		if (max_tr.data[cpu]) -			atomic_inc(&max_tr.data[cpu]->disabled); -	} -  	/* value is in KB */  	val <<= 10; -	if (val != global_trace.entries) { -		ret = tracing_resize_ring_buffer(val); -		if (ret < 0) { -			cnt = ret; -			goto out; -		} -	} +	ret = tracing_resize_ring_buffer(val); +	if (ret < 0) +		return ret;  	*ppos += cnt; -	/* If check pages failed, return ENOMEM */ -	if (tracing_disabled) -		cnt = -ENOMEM; - out: -	for_each_tracing_cpu(cpu) { -		if (global_trace.data[cpu]) -			atomic_dec(&global_trace.data[cpu]->disabled); -		if (max_tr.data[cpu]) -			atomic_dec(&max_tr.data[cpu]->disabled); -	} +	return cnt; +} -	tracing_start(); -	mutex_unlock(&trace_types_lock); +static ssize_t +tracing_free_buffer_write(struct file *filp, const char __user *ubuf, +			  size_t cnt, loff_t *ppos) +{ +	/* +	 * There is no need to read what the user has written, this function +	 * is just to make sure that there is no error when "echo" is used +	 */ + +	*ppos += cnt;  	return cnt;  } +static int +tracing_free_buffer_release(struct inode *inode, struct file *filp) +{ +	/* disable tracing ? */ +	if (trace_flags & TRACE_ITER_STOP_ON_FREE) +		tracing_off(); +	/* resize the ring buffer to 0 */ +	tracing_resize_ring_buffer(0); + +	return 0; +} +  static int mark_printk(const char *fmt, ...)  {  	int ret; @@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = {  	.llseek		= generic_file_llseek,  }; +static const struct file_operations tracing_free_buffer_fops = { +	.write		= tracing_free_buffer_write, +	.release	= tracing_free_buffer_release, +}; +  static const struct file_operations tracing_mark_fops = {  	.open		= tracing_open_generic,  	.write		= tracing_mark_write, @@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,  		return 0;  	if (!info->spare) -		info->spare = ring_buffer_alloc_read_page(info->tr->buffer); +		info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);  	if (!info->spare)  		return -ENOMEM; @@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,  		ref->ref = 1;  		ref->buffer = info->tr->buffer; -		ref->page = ring_buffer_alloc_read_page(ref->buffer); +		ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);  		if (!ref->page) {  			kfree(ref);  			break; @@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,  		r = ring_buffer_read_page(ref->buffer, &ref->page,  					  len, info->cpu, 1);  		if (r < 0) { -			ring_buffer_free_read_page(ref->buffer, -						   ref->page); +			ring_buffer_free_read_page(ref->buffer, ref->page);  			kfree(ref);  			break;  		} @@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,  {  	struct trace_option_dentry *topt = filp->private_data;  	unsigned long val; -	char buf[64];  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	if (val != 0 && val != 1) @@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,  			 loff_t *ppos)  {  	long index = (long)filp->private_data; -	char buf[64];  	unsigned long val;  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	if (val != 0 && val != 1) @@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void)  	trace_create_file("buffer_size_kb", 0644, d_tracer,  			&global_trace, &tracing_entries_fops); +	trace_create_file("free_buffer", 0644, d_tracer, +			&global_trace, &tracing_free_buffer_fops); +  	trace_create_file("trace_marker", 0220, d_tracer,  			NULL, &tracing_mark_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 229f8591f61d..3f381d0b20a8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -278,6 +278,29 @@ struct tracer {  }; +/* Only current can touch trace_recursion */ +#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) +#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) + +/* Ring buffer has the 10 LSB bits to count */ +#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) + +/* for function tracing recursion */ +#define TRACE_INTERNAL_BIT		(1<<11) +#define TRACE_GLOBAL_BIT		(1<<12) +/* + * Abuse of the trace_recursion. + * As we need a way to maintain state if we are tracing the function + * graph in irq because we want to trace a particular function that + * was called in irq context but we have irq tracing off. Since this + * can only be modified by current, we can reuse trace_recursion. + */ +#define TRACE_IRQ_BIT			(1<<13) + +#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0) +#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0) +#define trace_recursion_test(bit)	((current)->trace_recursion & (bit)) +  #define TRACE_PIPE_ALL_CPU	-1  int tracer_init(struct tracer *t, struct trace_array *tr); @@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr,  void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,  			int skip, int pc); +void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags, +			     int skip, int pc, struct pt_regs *regs); +  void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,  			    int pc); @@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,  {  } +static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer, +					   unsigned long flags, int skip, +					   int pc, struct pt_regs *regs) +{ +} +  static inline void ftrace_trace_userstack(struct ring_buffer *buffer,  					  unsigned long flags, int pc)  { @@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)  		return 1;  	for (i = 0; i < ftrace_graph_count; i++) { -		if (addr == ftrace_graph_funcs[i]) +		if (addr == ftrace_graph_funcs[i]) { +			/* +			 * If no irqs are to be traced, but a set_graph_function +			 * is set, and called by an interrupt handler, we still +			 * want to trace it. +			 */ +			if (in_irq()) +				trace_recursion_set(TRACE_IRQ_BIT); +			else +				trace_recursion_clear(TRACE_IRQ_BIT);  			return 1; +		}  	}  	return 0; @@ -609,6 +651,7 @@ enum trace_iterator_flags {  	TRACE_ITER_GRAPH_TIME		= 0x80000,  	TRACE_ITER_RECORD_CMD		= 0x100000,  	TRACE_ITER_OVERWRITE		= 0x200000, +	TRACE_ITER_STOP_ON_FREE		= 0x400000,  };  /* @@ -677,6 +720,7 @@ struct event_subsystem {  	struct dentry		*entry;  	struct event_filter	*filter;  	int			nr_events; +	int			ref_count;  };  #define FILTER_PRED_INVALID	((unsigned short)-1) @@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[];  	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))  #include "trace_entries.h" -/* Only current can touch trace_recursion */ -#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) -#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) - -/* Ring buffer has the 10 LSB bits to count */ -#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) - -/* for function tracing recursion */ -#define TRACE_INTERNAL_BIT		(1<<11) -#define TRACE_GLOBAL_BIT		(1<<12) - -#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0) -#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0) -#define trace_recursion_test(bit)	((current)->trace_recursion & (bit)) -  #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e32744c84d94..93365907f219 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,  	TRACE_STACK,  	F_STRUCT( -		__array(	unsigned long,	caller, FTRACE_STACK_ENTRIES	) +		__field(	int,		size	) +		__dynamic_array(unsigned long,	caller	)  	),  	F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 686ec399f2a8..581876f9f387 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -244,6 +244,35 @@ static void ftrace_clear_events(void)  	mutex_unlock(&event_mutex);  } +static void __put_system(struct event_subsystem *system) +{ +	struct event_filter *filter = system->filter; + +	WARN_ON_ONCE(system->ref_count == 0); +	if (--system->ref_count) +		return; + +	if (filter) { +		kfree(filter->filter_string); +		kfree(filter); +	} +	kfree(system->name); +	kfree(system); +} + +static void __get_system(struct event_subsystem *system) +{ +	WARN_ON_ONCE(system->ref_count == 0); +	system->ref_count++; +} + +static void put_system(struct event_subsystem *system) +{ +	mutex_lock(&event_mutex); +	__put_system(system); +	mutex_unlock(&event_mutex); +} +  /*   * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.   */ @@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,  		   loff_t *ppos)  {  	struct ftrace_event_call *call = filp->private_data; -	char buf[64];  	unsigned long val;  	int ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	ret = tracing_update_buffers(); @@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,  		   loff_t *ppos)  {  	const char set_to_char[4] = { '?', '0', '1', 'X' }; -	const char *system = filp->private_data; +	struct event_subsystem *system = filp->private_data;  	struct ftrace_event_call *call;  	char buf[2];  	int set = 0; @@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,  		if (!call->name || !call->class || !call->class->reg)  			continue; -		if (system && strcmp(call->class->system, system) != 0) +		if (system && strcmp(call->class->system, system->name) != 0)  			continue;  		/* @@ -569,21 +589,13 @@ static ssize_t  system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,  		    loff_t *ppos)  { -	const char *system = filp->private_data; +	struct event_subsystem *system = filp->private_data; +	const char *name = NULL;  	unsigned long val; -	char buf[64];  	ssize_t ret; -	if (cnt >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, cnt, 10, &val); +	if (ret)  		return ret;  	ret = tracing_update_buffers(); @@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,  	if (val != 0 && val != 1)  		return -EINVAL; -	ret = __ftrace_set_clr_event(NULL, system, NULL, val); +	/* +	 * Opening of "enable" adds a ref count to system, +	 * so the name is safe to use. +	 */ +	if (system) +		name = system->name; + +	ret = __ftrace_set_clr_event(NULL, name, NULL, val);  	if (ret)  		goto out; @@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,  	return cnt;  } +static LIST_HEAD(event_subsystems); + +static int subsystem_open(struct inode *inode, struct file *filp) +{ +	struct event_subsystem *system = NULL; +	int ret; + +	if (!inode->i_private) +		goto skip_search; + +	/* Make sure the system still exists */ +	mutex_lock(&event_mutex); +	list_for_each_entry(system, &event_subsystems, list) { +		if (system == inode->i_private) { +			/* Don't open systems with no events */ +			if (!system->nr_events) { +				system = NULL; +				break; +			} +			__get_system(system); +			break; +		} +	} +	mutex_unlock(&event_mutex); + +	if (system != inode->i_private) +		return -ENODEV; + + skip_search: +	ret = tracing_open_generic(inode, filp); +	if (ret < 0 && system) +		put_system(system); + +	return ret; +} + +static int subsystem_release(struct inode *inode, struct file *file) +{ +	struct event_subsystem *system = inode->i_private; + +	if (system) +		put_system(system); + +	return 0; +} +  static ssize_t  subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,  		      loff_t *ppos) @@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = {  };  static const struct file_operations ftrace_subsystem_filter_fops = { -	.open = tracing_open_generic, +	.open = subsystem_open,  	.read = subsystem_filter_read,  	.write = subsystem_filter_write,  	.llseek = default_llseek, +	.release = subsystem_release,  };  static const struct file_operations ftrace_system_enable_fops = { -	.open = tracing_open_generic, +	.open = subsystem_open,  	.read = system_enable_read,  	.write = system_enable_write,  	.llseek = default_llseek, +	.release = subsystem_release,  };  static const struct file_operations ftrace_show_header_fops = { @@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void)  	return d_events;  } -static LIST_HEAD(event_subsystems); -  static struct dentry *  event_subsystem_dir(const char *name, struct dentry *d_events)  { @@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)  	/* First see if we did not already create this dir */  	list_for_each_entry(system, &event_subsystems, list) {  		if (strcmp(system->name, name) == 0) { +			__get_system(system);  			system->nr_events++;  			return system->entry;  		} @@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)  	}  	system->nr_events = 1; +	system->ref_count = 1;  	system->name = kstrdup(name, GFP_KERNEL);  	if (!system->name) {  		debugfs_remove(system->entry); @@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)  			   "'%s/filter' entry\n", name);  	} -	trace_create_file("enable", 0644, system->entry, -			  (void *)system->name, +	trace_create_file("enable", 0644, system->entry, system,  			  &ftrace_system_enable_fops);  	return system->entry; @@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name)  	list_for_each_entry(system, &event_subsystems, list) {  		if (strcmp(system->name, name) == 0) {  			if (!--system->nr_events) { -				struct event_filter *filter = system->filter; -  				debugfs_remove_recursive(system->entry);  				list_del(&system->list); -				if (filter) { -					kfree(filter->filter_string); -					kfree(filter); -				} -				kfree(system->name); -				kfree(system); +				__put_system(system);  			}  			break;  		} diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8008ddcfbf20..256764ecccd6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,  	mutex_lock(&event_mutex); +	/* Make sure the system still has events */ +	if (!system->nr_events) { +		err = -ENODEV; +		goto out_unlock; +	} +  	if (!strcmp(strstrip(filter_string), "0")) {  		filter_free_subsystem_preds(system);  		remove_filter_string(system->filter); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8d0e1cc4e974..c7b0c6a7db09 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)  }  static int -ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) +ftrace_trace_onoff_callback(struct ftrace_hash *hash, +			    char *glob, char *cmd, char *param, int enable)  {  	struct ftrace_probe_ops *ops;  	void *count = (void *)-1; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 962cdb24ed81..a7d2a4c653d8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = {  static struct trace_array *graph_array; +/* + * DURATION column is being also used to display IRQ signs, + * following values are used by print_graph_irq and others + * to fill in space into DURATION column. + */ +enum { +	DURATION_FILL_FULL  = -1, +	DURATION_FILL_START = -2, +	DURATION_FILL_END   = -3, +}; + +static enum print_line_t +print_graph_duration(unsigned long long duration, struct trace_seq *s, +		     u32 flags);  /* Add a function return address to the trace stack on thread info.*/  int @@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,  static inline int ftrace_graph_ignore_irqs(void)  { -	if (!ftrace_graph_skip_irqs) +	if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))  		return 0;  	return in_irq(); @@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter,  	return next;  } -/* Signal a overhead of time execution to the output */ -static int -print_graph_overhead(unsigned long long duration, struct trace_seq *s, -		     u32 flags) -{ -	/* If duration disappear, we don't need anything */ -	if (!(flags & TRACE_GRAPH_PRINT_DURATION)) -		return 1; - -	/* Non nested entry or return */ -	if (duration == -1) -		return trace_seq_printf(s, "  "); - -	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { -		/* Duration exceeded 100 msecs */ -		if (duration > 100000ULL) -			return trace_seq_printf(s, "! "); - -		/* Duration exceeded 10 msecs */ -		if (duration > 10000ULL) -			return trace_seq_printf(s, "+ "); -	} - -	return trace_seq_printf(s, "  "); -} -  static int print_graph_abs_time(u64 t, struct trace_seq *s)  {  	unsigned long usecs_rem; @@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,  		addr >= (unsigned long)__irqentry_text_end)  		return TRACE_TYPE_UNHANDLED; -	/* Absolute time */ -	if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { -		ret = print_graph_abs_time(iter->ts, s); -		if (!ret) -			return TRACE_TYPE_PARTIAL_LINE; -	} +	if (trace_flags & TRACE_ITER_CONTEXT_INFO) { +		/* Absolute time */ +		if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { +			ret = print_graph_abs_time(iter->ts, s); +			if (!ret) +				return TRACE_TYPE_PARTIAL_LINE; +		} -	/* Cpu */ -	if (flags & TRACE_GRAPH_PRINT_CPU) { -		ret = print_graph_cpu(s, cpu); -		if (ret == TRACE_TYPE_PARTIAL_LINE) -			return TRACE_TYPE_PARTIAL_LINE; -	} +		/* Cpu */ +		if (flags & TRACE_GRAPH_PRINT_CPU) { +			ret = print_graph_cpu(s, cpu); +			if (ret == TRACE_TYPE_PARTIAL_LINE) +				return TRACE_TYPE_PARTIAL_LINE; +		} -	/* Proc */ -	if (flags & TRACE_GRAPH_PRINT_PROC) { -		ret = print_graph_proc(s, pid); -		if (ret == TRACE_TYPE_PARTIAL_LINE) -			return TRACE_TYPE_PARTIAL_LINE; -		ret = trace_seq_printf(s, " | "); -		if (!ret) -			return TRACE_TYPE_PARTIAL_LINE; +		/* Proc */ +		if (flags & TRACE_GRAPH_PRINT_PROC) { +			ret = print_graph_proc(s, pid); +			if (ret == TRACE_TYPE_PARTIAL_LINE) +				return TRACE_TYPE_PARTIAL_LINE; +			ret = trace_seq_printf(s, " | "); +			if (!ret) +				return TRACE_TYPE_PARTIAL_LINE; +		}  	}  	/* No overhead */ -	ret = print_graph_overhead(-1, s, flags); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; +	ret = print_graph_duration(DURATION_FILL_START, s, flags); +	if (ret != TRACE_TYPE_HANDLED) +		return ret;  	if (type == TRACE_GRAPH_ENT)  		ret = trace_seq_printf(s, "==========>"); @@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,  	if (!ret)  		return TRACE_TYPE_PARTIAL_LINE; -	/* Don't close the duration column if haven't one */ -	if (flags & TRACE_GRAPH_PRINT_DURATION) -		trace_seq_printf(s, " |"); +	ret = print_graph_duration(DURATION_FILL_END, s, flags); +	if (ret != TRACE_TYPE_HANDLED) +		return ret; +  	ret = trace_seq_printf(s, "\n");  	if (!ret) @@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)  }  static enum print_line_t -print_graph_duration(unsigned long long duration, struct trace_seq *s) +print_graph_duration(unsigned long long duration, struct trace_seq *s, +		     u32 flags)  { -	int ret; +	int ret = -1; + +	if (!(flags & TRACE_GRAPH_PRINT_DURATION) || +	    !(trace_flags & TRACE_ITER_CONTEXT_INFO)) +			return TRACE_TYPE_HANDLED; + +	/* No real adata, just filling the column with spaces */ +	switch (duration) { +	case DURATION_FILL_FULL: +		ret = trace_seq_printf(s, "              |  "); +		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +	case DURATION_FILL_START: +		ret = trace_seq_printf(s, "  "); +		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +	case DURATION_FILL_END: +		ret = trace_seq_printf(s, " |"); +		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +	} + +	/* Signal a overhead of time execution to the output */ +	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { +		/* Duration exceeded 100 msecs */ +		if (duration > 100000ULL) +			ret = trace_seq_printf(s, "! "); +		/* Duration exceeded 10 msecs */ +		else if (duration > 10000ULL) +			ret = trace_seq_printf(s, "+ "); +	} + +	/* +	 * The -1 means we either did not exceed the duration tresholds +	 * or we dont want to print out the overhead. Either way we need +	 * to fill out the space. +	 */ +	if (ret == -1) +		ret = trace_seq_printf(s, "  "); + +	/* Catching here any failure happenned above */ +	if (!ret) +		return TRACE_TYPE_PARTIAL_LINE;  	ret = trace_print_graph_duration(duration, s);  	if (ret != TRACE_TYPE_HANDLED) @@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter,  			cpu_data->enter_funcs[call->depth] = 0;  	} -	/* Overhead */ -	ret = print_graph_overhead(duration, s, flags); -	if (!ret) +	/* Overhead and duration */ +	ret = print_graph_duration(duration, s, flags); +	if (ret == TRACE_TYPE_PARTIAL_LINE)  		return TRACE_TYPE_PARTIAL_LINE; -	/* Duration */ -	if (flags & TRACE_GRAPH_PRINT_DURATION) { -		ret = print_graph_duration(duration, s); -		if (ret == TRACE_TYPE_PARTIAL_LINE) -			return TRACE_TYPE_PARTIAL_LINE; -	} -  	/* Function */  	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {  		ret = trace_seq_printf(s, " "); @@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter,  			cpu_data->enter_funcs[call->depth] = call->func;  	} -	/* No overhead */ -	ret = print_graph_overhead(-1, s, flags); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; -  	/* No time */ -	if (flags & TRACE_GRAPH_PRINT_DURATION) { -		ret = trace_seq_printf(s, "            |  "); -		if (!ret) -			return TRACE_TYPE_PARTIAL_LINE; -	} +	ret = print_graph_duration(DURATION_FILL_FULL, s, flags); +	if (ret != TRACE_TYPE_HANDLED) +		return ret;  	/* Function */  	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { @@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,  			return TRACE_TYPE_PARTIAL_LINE;  	} +	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) +		return 0; +  	/* Absolute time */  	if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {  		ret = print_graph_abs_time(iter->ts, s); @@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,  	if (print_graph_prologue(iter, s, 0, 0, flags))  		return TRACE_TYPE_PARTIAL_LINE; -	/* Overhead */ -	ret = print_graph_overhead(duration, s, flags); -	if (!ret) +	/* Overhead and duration */ +	ret = print_graph_duration(duration, s, flags); +	if (ret == TRACE_TYPE_PARTIAL_LINE)  		return TRACE_TYPE_PARTIAL_LINE; -	/* Duration */ -	if (flags & TRACE_GRAPH_PRINT_DURATION) { -		ret = print_graph_duration(duration, s); -		if (ret == TRACE_TYPE_PARTIAL_LINE) -			return TRACE_TYPE_PARTIAL_LINE; -	} -  	/* Closing brace */  	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {  		ret = trace_seq_printf(s, " "); @@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,  	if (print_graph_prologue(iter, s, 0, 0, flags))  		return TRACE_TYPE_PARTIAL_LINE; -	/* No overhead */ -	ret = print_graph_overhead(-1, s, flags); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; -  	/* No time */ -	if (flags & TRACE_GRAPH_PRINT_DURATION) { -		ret = trace_seq_printf(s, "            |  "); -		if (!ret) -			return TRACE_TYPE_PARTIAL_LINE; -	} +	ret = print_graph_duration(DURATION_FILL_FULL, s, flags); +	if (ret != TRACE_TYPE_HANDLED) +		return ret;  	/* Indentation */  	if (depth > 0) @@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,  enum print_line_t -__print_graph_function_flags(struct trace_iterator *iter, u32 flags) +print_graph_function_flags(struct trace_iterator *iter, u32 flags)  {  	struct ftrace_graph_ent_entry *field;  	struct fgraph_data *data = iter->private; @@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags)  static enum print_line_t  print_graph_function(struct trace_iterator *iter)  { -	return __print_graph_function_flags(iter, tracer_flags.val); -} - -enum print_line_t print_graph_function_flags(struct trace_iterator *iter, -					     u32 flags) -{ -	if (trace_flags & TRACE_ITER_LATENCY_FMT) -		flags |= TRACE_GRAPH_PRINT_DURATION; -	else -		flags |= TRACE_GRAPH_PRINT_ABS_TIME; - -	return __print_graph_function_flags(iter, flags); +	return print_graph_function_flags(iter, tracer_flags.val);  }  static enum print_line_t @@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)  	seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);  	seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);  	seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces); -	seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces); -	seq_printf(s, "#%.*s|||| /                     \n", size, spaces); +	seq_printf(s, "#%.*s||| /                      \n", size, spaces);  }  static void __print_graph_headers_flags(struct seq_file *s, u32 flags) @@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)  	if (flags & TRACE_GRAPH_PRINT_PROC)  		seq_printf(s, "  TASK/PID       ");  	if (lat) -		seq_printf(s, "|||||"); +		seq_printf(s, "||||");  	if (flags & TRACE_GRAPH_PRINT_DURATION)  		seq_printf(s, "  DURATION   ");  	seq_printf(s, "               FUNCTION CALLS\n"); @@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)  	if (flags & TRACE_GRAPH_PRINT_PROC)  		seq_printf(s, "   |    |        ");  	if (lat) -		seq_printf(s, "|||||"); +		seq_printf(s, "||||");  	if (flags & TRACE_GRAPH_PRINT_DURATION)  		seq_printf(s, "   |   |      ");  	seq_printf(s, "               |   |   |   |\n"); @@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)  {  	struct trace_iterator *iter = s->private; +	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) +		return; +  	if (trace_flags & TRACE_ITER_LATENCY_FMT) {  		/* print nothing if the buffers are empty */  		if (trace_empty(iter))  			return;  		print_trace_header(s, iter); -		flags |= TRACE_GRAPH_PRINT_DURATION; -	} else -		flags |= TRACE_GRAPH_PRINT_ABS_TIME; +	}  	__print_graph_headers_flags(s, flags);  } diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index c77424be284d..667aa8cc0cfc 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter)  }  #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ -			    TRACE_GRAPH_PRINT_PROC) +			    TRACE_GRAPH_PRINT_PROC | \ +			    TRACE_GRAPH_PRINT_ABS_TIME | \ +			    TRACE_GRAPH_PRINT_DURATION)  static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)  { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 27d13b36b8be..5fb3697bf0e5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref)  DEFINE_FETCH_deref(string)  DEFINE_FETCH_deref(string_size) +static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) +{ +	if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) +		update_deref_fetch_param(data->orig.data); +	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) +		update_symbol_cache(data->orig.data); +} +  static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)  {  	if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) @@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield)  #define fetch_bitfield_string_size NULL  static __kprobes void +update_bitfield_fetch_param(struct bitfield_fetch_param *data) +{ +	/* +	 * Don't check the bitfield itself, because this must be the +	 * last fetch function. +	 */ +	if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) +		update_deref_fetch_param(data->orig.data); +	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) +		update_symbol_cache(data->orig.data); +} + +static __kprobes void  free_bitfield_fetch_param(struct bitfield_fetch_param *data)  {  	/* @@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)  		free_symbol_cache(data->orig.data);  	kfree(data);  } +  /* Default (unsigned long) fetch type */  #define __DEFAULT_FETCH_TYPE(t) u##t  #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) @@ -536,6 +558,7 @@ struct probe_arg {  /* Flags for trace_probe */  #define TP_FLAG_TRACE	1  #define TP_FLAG_PROFILE	2 +#define TP_FLAG_REGISTERED 4  struct trace_probe {  	struct list_head	list; @@ -555,16 +578,49 @@ struct trace_probe {  	(sizeof(struct probe_arg) * (n))) -static __kprobes int probe_is_return(struct trace_probe *tp) +static __kprobes int trace_probe_is_return(struct trace_probe *tp)  {  	return tp->rp.handler != NULL;  } -static __kprobes const char *probe_symbol(struct trace_probe *tp) +static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)  {  	return tp->symbol ? tp->symbol : "unknown";  } +static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) +{ +	return tp->rp.kp.offset; +} + +static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) +{ +	return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); +} + +static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) +{ +	return !!(tp->flags & TP_FLAG_REGISTERED); +} + +static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) +{ +	return !!(kprobe_gone(&tp->rp.kp)); +} + +static __kprobes bool trace_probe_within_module(struct trace_probe *tp, +						struct module *mod) +{ +	int len = strlen(mod->name); +	const char *name = trace_probe_symbol(tp); +	return strncmp(mod->name, name, len) == 0 && name[len] == ':'; +} + +static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) +{ +	return !!strchr(trace_probe_symbol(tp), ':'); +} +  static int register_probe_event(struct trace_probe *tp);  static void unregister_probe_event(struct trace_probe *tp); @@ -646,6 +702,16 @@ error:  	return ERR_PTR(ret);  } +static void update_probe_arg(struct probe_arg *arg) +{ +	if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) +		update_bitfield_fetch_param(arg->fetch.data); +	else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) +		update_deref_fetch_param(arg->fetch.data); +	else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) +		update_symbol_cache(arg->fetch.data); +} +  static void free_probe_arg(struct probe_arg *arg)  {  	if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) @@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp)  	kfree(tp);  } -static struct trace_probe *find_probe_event(const char *event, +static struct trace_probe *find_trace_probe(const char *event,  					    const char *group)  {  	struct trace_probe *tp; @@ -683,13 +749,96 @@ static struct trace_probe *find_probe_event(const char *event,  	return NULL;  } +/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ +static int enable_trace_probe(struct trace_probe *tp, int flag) +{ +	int ret = 0; + +	tp->flags |= flag; +	if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && +	    !trace_probe_has_gone(tp)) { +		if (trace_probe_is_return(tp)) +			ret = enable_kretprobe(&tp->rp); +		else +			ret = enable_kprobe(&tp->rp.kp); +	} + +	return ret; +} + +/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ +static void disable_trace_probe(struct trace_probe *tp, int flag) +{ +	tp->flags &= ~flag; +	if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { +		if (trace_probe_is_return(tp)) +			disable_kretprobe(&tp->rp); +		else +			disable_kprobe(&tp->rp.kp); +	} +} + +/* Internal register function - just handle k*probes and flags */ +static int __register_trace_probe(struct trace_probe *tp) +{ +	int i, ret; + +	if (trace_probe_is_registered(tp)) +		return -EINVAL; + +	for (i = 0; i < tp->nr_args; i++) +		update_probe_arg(&tp->args[i]); + +	/* Set/clear disabled flag according to tp->flag */ +	if (trace_probe_is_enabled(tp)) +		tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; +	else +		tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; + +	if (trace_probe_is_return(tp)) +		ret = register_kretprobe(&tp->rp); +	else +		ret = register_kprobe(&tp->rp.kp); + +	if (ret == 0) +		tp->flags |= TP_FLAG_REGISTERED; +	else { +		pr_warning("Could not insert probe at %s+%lu: %d\n", +			   trace_probe_symbol(tp), trace_probe_offset(tp), ret); +		if (ret == -ENOENT && trace_probe_is_on_module(tp)) { +			pr_warning("This probe might be able to register after" +				   "target module is loaded. Continue.\n"); +			ret = 0; +		} else if (ret == -EILSEQ) { +			pr_warning("Probing address(0x%p) is not an " +				   "instruction boundary.\n", +				   tp->rp.kp.addr); +			ret = -EINVAL; +		} +	} + +	return ret; +} + +/* Internal unregister function - just handle k*probes and flags */ +static void __unregister_trace_probe(struct trace_probe *tp) +{ +	if (trace_probe_is_registered(tp)) { +		if (trace_probe_is_return(tp)) +			unregister_kretprobe(&tp->rp); +		else +			unregister_kprobe(&tp->rp.kp); +		tp->flags &= ~TP_FLAG_REGISTERED; +		/* Cleanup kprobe for reuse */ +		if (tp->rp.kp.symbol_name) +			tp->rp.kp.addr = NULL; +	} +} +  /* Unregister a trace_probe and probe_event: call with locking probe_lock */  static void unregister_trace_probe(struct trace_probe *tp)  { -	if (probe_is_return(tp)) -		unregister_kretprobe(&tp->rp); -	else -		unregister_kprobe(&tp->rp.kp); +	__unregister_trace_probe(tp);  	list_del(&tp->list);  	unregister_probe_event(tp);  } @@ -702,41 +851,65 @@ static int register_trace_probe(struct trace_probe *tp)  	mutex_lock(&probe_lock); -	/* register as an event */ -	old_tp = find_probe_event(tp->call.name, tp->call.class->system); +	/* Delete old (same name) event if exist */ +	old_tp = find_trace_probe(tp->call.name, tp->call.class->system);  	if (old_tp) { -		/* delete old event */  		unregister_trace_probe(old_tp);  		free_trace_probe(old_tp);  	} + +	/* Register new event */  	ret = register_probe_event(tp);  	if (ret) {  		pr_warning("Failed to register probe event(%d)\n", ret);  		goto end;  	} -	tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; -	if (probe_is_return(tp)) -		ret = register_kretprobe(&tp->rp); -	else -		ret = register_kprobe(&tp->rp.kp); - -	if (ret) { -		pr_warning("Could not insert probe(%d)\n", ret); -		if (ret == -EILSEQ) { -			pr_warning("Probing address(0x%p) is not an " -				   "instruction boundary.\n", -				   tp->rp.kp.addr); -			ret = -EINVAL; -		} +	/* Register k*probe */ +	ret = __register_trace_probe(tp); +	if (ret < 0)  		unregister_probe_event(tp); -	} else +	else  		list_add_tail(&tp->list, &probe_list); +  end:  	mutex_unlock(&probe_lock);  	return ret;  } +/* Module notifier call back, checking event on the module */ +static int trace_probe_module_callback(struct notifier_block *nb, +				       unsigned long val, void *data) +{ +	struct module *mod = data; +	struct trace_probe *tp; +	int ret; + +	if (val != MODULE_STATE_COMING) +		return NOTIFY_DONE; + +	/* Update probes on coming module */ +	mutex_lock(&probe_lock); +	list_for_each_entry(tp, &probe_list, list) { +		if (trace_probe_within_module(tp, mod)) { +			__unregister_trace_probe(tp); +			ret = __register_trace_probe(tp); +			if (ret) +				pr_warning("Failed to re-register probe %s on" +					   "%s: %d\n", +					   tp->call.name, mod->name, ret); +		} +	} +	mutex_unlock(&probe_lock); + +	return NOTIFY_DONE; +} + +static struct notifier_block trace_probe_module_nb = { +	.notifier_call = trace_probe_module_callback, +	.priority = 1	/* Invoked after kprobe module callback */ +}; +  /* Split symbol and offset. */  static int split_symbol_offset(char *symbol, unsigned long *offset)  { @@ -962,8 +1135,8 @@ static int create_trace_probe(int argc, char **argv)  {  	/*  	 * Argument syntax: -	 *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] -	 *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] +	 *  - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] +	 *  - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]  	 * Fetch args:  	 *  $retval	: fetch return value  	 *  $stack	: fetch stack address @@ -1025,7 +1198,7 @@ static int create_trace_probe(int argc, char **argv)  			return -EINVAL;  		}  		mutex_lock(&probe_lock); -		tp = find_probe_event(event, group); +		tp = find_trace_probe(event, group);  		if (!tp) {  			mutex_unlock(&probe_lock);  			pr_info("Event %s/%s doesn't exist.\n", group, event); @@ -1144,7 +1317,7 @@ error:  	return ret;  } -static void cleanup_all_probes(void) +static void release_all_trace_probes(void)  {  	struct trace_probe *tp; @@ -1158,7 +1331,6 @@ static void cleanup_all_probes(void)  	mutex_unlock(&probe_lock);  } -  /* Probes listing interfaces */  static void *probes_seq_start(struct seq_file *m, loff_t *pos)  { @@ -1181,15 +1353,16 @@ static int probes_seq_show(struct seq_file *m, void *v)  	struct trace_probe *tp = v;  	int i; -	seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); +	seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');  	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);  	if (!tp->symbol)  		seq_printf(m, " 0x%p", tp->rp.kp.addr);  	else if (tp->rp.kp.offset) -		seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); +		seq_printf(m, " %s+%u", trace_probe_symbol(tp), +			   tp->rp.kp.offset);  	else -		seq_printf(m, " %s", probe_symbol(tp)); +		seq_printf(m, " %s", trace_probe_symbol(tp));  	for (i = 0; i < tp->nr_args; i++)  		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); @@ -1209,7 +1382,7 @@ static int probes_open(struct inode *inode, struct file *file)  {  	if ((file->f_mode & FMODE_WRITE) &&  	    (file->f_flags & O_TRUNC)) -		cleanup_all_probes(); +		release_all_trace_probes();  	return seq_open(file, &probes_seq_op);  } @@ -1397,7 +1570,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);  	if (!filter_current_check_discard(buffer, call, entry, event)) -		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); +		trace_nowake_buffer_unlock_commit_regs(buffer, event, +						       irq_flags, pc, regs);  }  /* Kretprobe handler */ @@ -1429,7 +1603,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,  	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);  	if (!filter_current_check_discard(buffer, call, entry, event)) -		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); +		trace_nowake_buffer_unlock_commit_regs(buffer, event, +						       irq_flags, pc, regs);  }  /* Event entry printers */ @@ -1511,30 +1686,6 @@ partial:  	return TRACE_TYPE_PARTIAL_LINE;  } -static int probe_event_enable(struct ftrace_event_call *call) -{ -	struct trace_probe *tp = (struct trace_probe *)call->data; - -	tp->flags |= TP_FLAG_TRACE; -	if (probe_is_return(tp)) -		return enable_kretprobe(&tp->rp); -	else -		return enable_kprobe(&tp->rp.kp); -} - -static void probe_event_disable(struct ftrace_event_call *call) -{ -	struct trace_probe *tp = (struct trace_probe *)call->data; - -	tp->flags &= ~TP_FLAG_TRACE; -	if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { -		if (probe_is_return(tp)) -			disable_kretprobe(&tp->rp); -		else -			disable_kprobe(&tp->rp.kp); -	} -} -  #undef DEFINE_FIELD  #define DEFINE_FIELD(type, item, name, is_signed)			\  	do {								\ @@ -1596,7 +1747,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)  	const char *fmt, *arg; -	if (!probe_is_return(tp)) { +	if (!trace_probe_is_return(tp)) {  		fmt = "(%lx)";  		arg = "REC->" FIELD_STRING_IP;  	} else { @@ -1713,49 +1864,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,  	head = this_cpu_ptr(call->perf_events);  	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);  } - -static int probe_perf_enable(struct ftrace_event_call *call) -{ -	struct trace_probe *tp = (struct trace_probe *)call->data; - -	tp->flags |= TP_FLAG_PROFILE; - -	if (probe_is_return(tp)) -		return enable_kretprobe(&tp->rp); -	else -		return enable_kprobe(&tp->rp.kp); -} - -static void probe_perf_disable(struct ftrace_event_call *call) -{ -	struct trace_probe *tp = (struct trace_probe *)call->data; - -	tp->flags &= ~TP_FLAG_PROFILE; - -	if (!(tp->flags & TP_FLAG_TRACE)) { -		if (probe_is_return(tp)) -			disable_kretprobe(&tp->rp); -		else -			disable_kprobe(&tp->rp.kp); -	} -}  #endif	/* CONFIG_PERF_EVENTS */  static __kprobes  int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)  { +	struct trace_probe *tp = (struct trace_probe *)event->data; +  	switch (type) {  	case TRACE_REG_REGISTER: -		return probe_event_enable(event); +		return enable_trace_probe(tp, TP_FLAG_TRACE);  	case TRACE_REG_UNREGISTER: -		probe_event_disable(event); +		disable_trace_probe(tp, TP_FLAG_TRACE);  		return 0;  #ifdef CONFIG_PERF_EVENTS  	case TRACE_REG_PERF_REGISTER: -		return probe_perf_enable(event); +		return enable_trace_probe(tp, TP_FLAG_PROFILE);  	case TRACE_REG_PERF_UNREGISTER: -		probe_perf_disable(event); +		disable_trace_probe(tp, TP_FLAG_PROFILE);  		return 0;  #endif  	} @@ -1805,7 +1932,7 @@ static int register_probe_event(struct trace_probe *tp)  	/* Initialize ftrace_event_call */  	INIT_LIST_HEAD(&call->class->fields); -	if (probe_is_return(tp)) { +	if (trace_probe_is_return(tp)) {  		call->event.funcs = &kretprobe_funcs;  		call->class->define_fields = kretprobe_event_define_fields;  	} else { @@ -1844,6 +1971,9 @@ static __init int init_kprobe_trace(void)  	struct dentry *d_tracer;  	struct dentry *entry; +	if (register_module_notifier(&trace_probe_module_nb)) +		return -EINVAL; +  	d_tracer = tracing_init_dentry();  	if (!d_tracer)  		return 0; @@ -1897,12 +2027,12 @@ static __init int kprobe_trace_self_tests_init(void)  		warn++;  	} else {  		/* Enable trace point */ -		tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); +		tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);  		if (WARN_ON_ONCE(tp == NULL)) {  			pr_warning("error on getting new probe.\n");  			warn++;  		} else -			probe_event_enable(&tp->call); +			enable_trace_probe(tp, TP_FLAG_TRACE);  	}  	ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " @@ -1912,12 +2042,12 @@ static __init int kprobe_trace_self_tests_init(void)  		warn++;  	} else {  		/* Enable trace point */ -		tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); +		tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);  		if (WARN_ON_ONCE(tp == NULL)) {  			pr_warning("error on getting new probe.\n");  			warn++;  		} else -			probe_event_enable(&tp->call); +			enable_trace_probe(tp, TP_FLAG_TRACE);  	}  	if (warn) @@ -1938,7 +2068,7 @@ static __init int kprobe_trace_self_tests_init(void)  	}  end: -	cleanup_all_probes(); +	release_all_trace_probes();  	if (warn)  		pr_cont("NG: Some tests are failed. Please check them.\n");  	else diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index e37de492a9e1..51999309a6cf 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,  {  	struct stack_entry *field;  	struct trace_seq *s = &iter->seq; -	int i; +	unsigned long *p; +	unsigned long *end;  	trace_assign_type(field, iter->ent); +	end = (unsigned long *)((long)iter->ent + iter->ent_size);  	if (!trace_seq_puts(s, "<stack trace>\n"))  		goto partial; -	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { -		if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) -			break; + +	for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {  		if (!trace_seq_puts(s, " => "))  			goto partial; -		if (!seq_print_ip_sym(s, field->caller[i], flags)) +		if (!seq_print_ip_sym(s, *p, flags))  			goto partial;  		if (!trace_seq_puts(s, "\n"))  			goto partial; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index f029dd4fd2ca..e4a70c0c71b6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter)  		graph_trace_close(iter);  } -#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) +#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \ +			    TRACE_GRAPH_PRINT_ABS_TIME | \ +			    TRACE_GRAPH_PRINT_DURATION)  static enum print_line_t wakeup_print_line(struct trace_iterator *iter)  { diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b0b53b8e4c25..77575b386d97 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,  {  	long *ptr = filp->private_data;  	unsigned long val, flags; -	char buf[64];  	int ret;  	int cpu; -	if (count >= sizeof(buf)) -		return -EINVAL; - -	if (copy_from_user(&buf, ubuf, count)) -		return -EFAULT; - -	buf[count] = 0; - -	ret = strict_strtoul(buf, 10, &val); -	if (ret < 0) +	ret = kstrtoul_from_user(ubuf, count, 10, &val); +	if (ret)  		return ret;  	local_irq_save(flags); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 3d0c56ad4792..36491cd5b7d4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -200,6 +200,7 @@ static int is_softlockup(unsigned long touch_ts)  }  #ifdef CONFIG_HARDLOCKUP_DETECTOR +  static struct perf_event_attr wd_hw_attr = {  	.type		= PERF_TYPE_HARDWARE,  	.config		= PERF_COUNT_HW_CPU_CYCLES, @@ -209,7 +210,7 @@ static struct perf_event_attr wd_hw_attr = {  };  /* Callback function for perf event subsystem */ -static void watchdog_overflow_callback(struct perf_event *event, int nmi, +static void watchdog_overflow_callback(struct perf_event *event,  		 struct perf_sample_data *data,  		 struct pt_regs *regs)  { @@ -368,10 +369,11 @@ static int watchdog_nmi_enable(int cpu)  	if (event != NULL)  		goto out_enable; -	/* Try to register using hardware perf events */  	wd_attr = &wd_hw_attr;  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); -	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); + +	/* Try to register using hardware perf events */ +	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);  	if (!IS_ERR(event)) {  		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");  		goto out_save; diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 063653955f9f..ef7f32291852 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -41,7 +41,7 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);  MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"  			" write operations on the kernel symbol"); -static void sample_hbp_handler(struct perf_event *bp, int nmi, +static void sample_hbp_handler(struct perf_event *bp,  			       struct perf_sample_data *data,  			       struct pt_regs *regs)  { @@ -60,7 +60,7 @@ static int __init hw_break_module_init(void)  	attr.bp_len = HW_BREAKPOINT_LEN_4;  	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; -	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler); +	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);  	if (IS_ERR((void __force *)sample_hbp)) {  		ret = PTR_ERR((void __force *)sample_hbp);  		goto fail; diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 6f5a498608b2..85c5f026930d 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -66,6 +66,12 @@ OPTIONS  	used. This interfaces starts by centering on the line with more  	samples, TAB/UNTAB cycles through the lines with more samples. +-c:: +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can +	be provided as a comma-separated list with no space: 0,1. Ranges of +	CPUs are specified with -: 0-2. Default is to report samples on all +	CPUs. +  SEE ALSO  --------  linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 02bafce4b341..2780d9ce48bf 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -34,9 +34,11 @@ OPTIONS  	Specify vmlinux path which has debuginfo (Dwarf binary).  -m:: ---module=MODNAME:: +--module=MODNAME|PATH::  	Specify module name in which perf-probe searches probe points -	or lines. +	or lines. If a path of module file is passed, perf-probe +	treat it as an offline module (this means you can add a probe on +        a module which has not been loaded yet).  -s::  --source=PATH:: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8ba03d6e5398..04253c07d19a 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -80,15 +80,24 @@ OPTIONS  --dump-raw-trace::          Dump raw trace in ASCII. --g [type,min]:: +-g [type,min,order]::  --call-graph:: -        Display call chains using type and min percent threshold. +        Display call chains using type, min percent threshold and order.  	type can be either:  	- flat: single column, linear exposure of call chains.  	- graph: use a graph tree, displaying absolute overhead rates.  	- fractal: like graph, but displays relative rates. Each branch of  		 the tree is considered as a new profiled object. + -	Default: fractal,0.5. + +	order can be either: +	- callee: callee based call graph. +	- caller: inverted caller based call graph. + +	Default: fractal,0.5,callee. + +-G:: +--inverted:: +        alias for inverted caller based call graph.  --pretty=<key>::          Pretty printing style.  key: normal, raw @@ -119,6 +128,12 @@ OPTIONS  --symfs=<directory>::          Look for files with symbols relative to this directory. +-c:: +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can +	be provided as a comma-separated list with no space: 0,1. Ranges of +	CPUs are specified with -: 0-2. Default is to report samples on all +	CPUs. +  SEE ALSO  --------  linkperf:perf-stat[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 86c87e214b11..db017867d9e8 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -115,10 +115,10 @@ OPTIONS  -f::  --fields::          Comma separated list of fields to print. Options are: -        comm, tid, pid, time, cpu, event, trace, sym. Field -        list can be prepended with the type, trace, sw or hw, +        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. +        Field list can be prepended with the type, trace, sw or hw,          to indicate to which event type the field list applies. -        e.g., -f sw:comm,tid,time,sym  and -f trace:time,cpu,trace +        e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace  		perf script -f <fields> @@ -132,17 +132,17 @@ OPTIONS  	The arguments are processed in the order received. A later usage can  	reset a prior request. e.g.: -		-f trace: -f comm,tid,time,sym +		-f trace: -f comm,tid,time,ip,sym  	The first -f suppresses trace events (field list is ""), but then the -	second invocation sets the fields to comm,tid,time,sym. In this case a +	second invocation sets the fields to comm,tid,time,ip,sym. In this case a  	warning is given to the user:  		"Overriding previous field request for all events."  	Alternativey, consider the order: -		-f comm,tid,time,sym -f trace: +		-f comm,tid,time,ip,sym -f trace:  	The first -f sets the fields for all events and the second -f  	suppresses trace events. The user is given a warning message about @@ -182,6 +182,12 @@ OPTIONS  --hide-call-graph::          When printing symbols do not display call chain. +-c:: +--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can +	be provided as a comma-separated list with no space: 0,1. Ranges of +	CPUs are specified with -: 0-2. Default is to report samples on all +	CPUs. +  SEE ALSO  --------  linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 940257b5774e..d0861bbd1d94 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -279,6 +279,7 @@ LIB_H += util/thread.h  LIB_H += util/thread_map.h  LIB_H += util/trace-event.h  LIB_H += util/probe-finder.h +LIB_H += util/dwarf-aux.h  LIB_H += util/probe-event.h  LIB_H += util/pstack.h  LIB_H += util/cpumap.h @@ -435,6 +436,7 @@ else  	BASIC_CFLAGS += -DDWARF_SUPPORT  	EXTLIBS += -lelf -ldw  	LIB_OBJS += $(OUTPUT)util/probe-finder.o +	LIB_OBJS += $(OUTPUT)util/dwarf-aux.o  endif # PERF_HAVE_DWARF_REGS  endif # NO_DWARF diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7b139e1e7e86..555aefd7fe01 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -28,6 +28,8 @@  #include "util/hist.h"  #include "util/session.h" +#include <linux/bitmap.h> +  static char		const *input_name = "perf.data";  static bool		force, use_tui, use_stdio; @@ -38,6 +40,9 @@ static bool		print_line;  static const char *sym_hist_filter; +static const char	*cpu_list; +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +  static int perf_evlist__add_sample(struct perf_evlist *evlist,  				   struct perf_sample *sample,  				   struct perf_evsel *evsel, @@ -90,6 +95,9 @@ static int process_sample_event(union perf_event *event,  		return -1;  	} +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) +		return 0; +  	if (!al.filtered &&  	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {  		pr_warning("problem incrementing symbol count, " @@ -177,6 +185,12 @@ static int __cmd_annotate(void)  	if (session == NULL)  		return -ENOMEM; +	if (cpu_list) { +		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); +		if (ret) +			goto out_delete; +	} +  	ret = perf_session__process_events(session, &event_ops);  	if (ret)  		goto out_delete; @@ -252,6 +266,7 @@ static const struct option options[] = {  		    "print matching source lines (may be slow)"),  	OPT_BOOLEAN('P', "full-paths", &full_paths,  		    "Don't shorten the displayed pathnames"), +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),  	OPT_END()  }; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 2c0e64d0b4aa..5f2a5c7046df 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -242,7 +242,8 @@ static const struct option options[] = {  	OPT_STRING('s', "source", &symbol_conf.source_prefix,  		   "directory", "path to kernel source"),  	OPT_STRING('m', "module", ¶ms.target_module, -		   "modname", "target module name"), +		   "modname|path", +		   "target module name (for online) or path (for offline)"),  #endif  	OPT__DRY_RUN(&probe_event_dry_run),  	OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8e2c85798185..80dc5b790e47 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -740,7 +740,7 @@ static bool force, append_file;  const struct option record_options[] = {  	OPT_CALLBACK('e', "event", &evsel_list, "event",  		     "event selector. use 'perf list' to list available events", -		     parse_events), +		     parse_events_option),  	OPT_CALLBACK(0, "filter", &evsel_list, "filter",  		     "event filter", parse_filter),  	OPT_INTEGER('p', "pid", &target_pid, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 287a173523a7..f854efda7686 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,6 +33,8 @@  #include "util/sort.h"  #include "util/hist.h" +#include <linux/bitmap.h> +  static char		const *input_name = "perf.data";  static bool		force, use_tui, use_stdio; @@ -45,9 +47,13 @@ static struct perf_read_values	show_threads_values;  static const char	default_pretty_printing_style[] = "normal";  static const char	*pretty_printing_style = default_pretty_printing_style; -static char		callchain_default_opt[] = "fractal,0.5"; +static char		callchain_default_opt[] = "fractal,0.5,callee"; +static bool		inverted_callchain;  static symbol_filter_t	annotate_init; +static const char	*cpu_list; +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +  static int perf_session__add_hist_entry(struct perf_session *session,  					struct addr_location *al,  					struct perf_sample *sample, @@ -116,6 +122,9 @@ static int process_sample_event(union perf_event *event,  	if (al.filtered || (hide_unresolved && al.sym == NULL))  		return 0; +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) +		return 0; +  	if (al.map != NULL)  		al.map->dso->hit = 1; @@ -262,6 +271,12 @@ static int __cmd_report(void)  	if (session == NULL)  		return -ENOMEM; +	if (cpu_list) { +		ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); +		if (ret) +			goto out_delete; +	} +  	if (show_threads)  		perf_read_values_init(&show_threads_values); @@ -386,13 +401,29 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,  	if (!tok)  		goto setup; -	tok2 = strtok(NULL, ",");  	callchain_param.min_percent = strtod(tok, &endptr);  	if (tok == endptr)  		return -1; -	if (tok2) +	/* get the print limit */ +	tok2 = strtok(NULL, ","); +	if (!tok2) +		goto setup; + +	if (tok2[0] != 'c') {  		callchain_param.print_limit = strtod(tok2, &endptr); +		tok2 = strtok(NULL, ","); +		if (!tok2) +			goto setup; +	} + +	/* get the call chain order */ +	if (!strcmp(tok2, "caller")) +		callchain_param.order = ORDER_CALLER; +	else if (!strcmp(tok2, "callee")) +		callchain_param.order = ORDER_CALLEE; +	else +		return -1;  setup:  	if (callchain_register_param(&callchain_param) < 0) {  		fprintf(stderr, "Can't register callchain params\n"); @@ -436,9 +467,10 @@ static const struct option options[] = {  		   "regex filter to identify parent, see: '--sort parent'"),  	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,  		    "Only display entries with parent-match"), -	OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent", -		     "Display callchains using output_type (graph, flat, fractal, or none) and min percent threshold. " -		     "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt), +	OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order", +		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. " +		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), +	OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"),  	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",  		   "only consider symbols in these dsos"),  	OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", @@ -455,6 +487,7 @@ static const struct option options[] = {  		    "Only display entries resolved to a symbol"),  	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",  		    "Look for files with symbols relative to this directory"), +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),  	OPT_END()  }; @@ -467,6 +500,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)  	else if (use_tui)  		use_browser = 1; +	if (inverted_callchain) +		callchain_param.order = ORDER_CALLER; +  	if (strcmp(input_name, "-") != 0)  		setup_browser(true);  	else @@ -504,7 +540,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)  	if (parent_pattern != default_parent_pattern) {  		if (sort_dimension__add("parent") < 0)  			return -1; -		sort_parent.elide = 1; + +		/* +		 * Only show the parent fields if we explicitly +		 * sort that way. If we only use parent machinery +		 * for filtering, we don't want it. +		 */ +		if (!strstr(sort_order, "parent")) +			sort_parent.elide = 1;  	} else  		symbol_conf.exclude_other = false; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 22747de7234b..09024ec2ab2e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -13,6 +13,7 @@  #include "util/util.h"  #include "util/evlist.h"  #include "util/evsel.h" +#include <linux/bitmap.h>  static char const		*script_name;  static char const		*generate_script_lang; @@ -21,6 +22,8 @@ static u64			last_timestamp;  static u64			nr_unordered;  extern const struct option	record_options[];  static bool			no_callchain; +static const char		*cpu_list; +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);  enum perf_output_field {  	PERF_OUTPUT_COMM            = 1U << 0, @@ -30,7 +33,10 @@ enum perf_output_field {  	PERF_OUTPUT_CPU             = 1U << 4,  	PERF_OUTPUT_EVNAME          = 1U << 5,  	PERF_OUTPUT_TRACE           = 1U << 6, -	PERF_OUTPUT_SYM             = 1U << 7, +	PERF_OUTPUT_IP              = 1U << 7, +	PERF_OUTPUT_SYM             = 1U << 8, +	PERF_OUTPUT_DSO             = 1U << 9, +	PERF_OUTPUT_ADDR            = 1U << 10,  };  struct output_option { @@ -44,7 +50,10 @@ struct output_option {  	{.str = "cpu",   .field = PERF_OUTPUT_CPU},  	{.str = "event", .field = PERF_OUTPUT_EVNAME},  	{.str = "trace", .field = PERF_OUTPUT_TRACE}, +	{.str = "ip",    .field = PERF_OUTPUT_IP},  	{.str = "sym",   .field = PERF_OUTPUT_SYM}, +	{.str = "dso",   .field = PERF_OUTPUT_DSO}, +	{.str = "addr",  .field = PERF_OUTPUT_ADDR},  };  /* default set to maintain compatibility with current format */ @@ -60,7 +69,8 @@ static struct {  		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |  			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | -			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, +			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | +				  PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,  		.invalid_fields = PERF_OUTPUT_TRACE,  	}, @@ -70,7 +80,8 @@ static struct {  		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |  			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | -			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, +			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | +				  PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,  		.invalid_fields = PERF_OUTPUT_TRACE,  	}, @@ -88,7 +99,8 @@ static struct {  		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |  			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | -			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, +			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | +				  PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,  		.invalid_fields = PERF_OUTPUT_TRACE,  	}, @@ -157,9 +169,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,  		!perf_session__has_traces(session, "record -R"))  		return -EINVAL; -	if (PRINT_FIELD(SYM)) { +	if (PRINT_FIELD(IP)) {  		if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP", -					   PERF_OUTPUT_SYM)) +					   PERF_OUTPUT_IP))  			return -EINVAL;  		if (!no_callchain && @@ -167,6 +179,24 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,  			symbol_conf.use_callchain = false;  	} +	if (PRINT_FIELD(ADDR) && +		perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR", +				       PERF_OUTPUT_ADDR)) +		return -EINVAL; + +	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { +		pr_err("Display of symbols requested but neither sample IP nor " +			   "sample address\nis selected. Hence, no addresses to convert " +		       "to symbols.\n"); +		return -EINVAL; +	} +	if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { +		pr_err("Display of DSO requested but neither sample IP nor " +			   "sample address\nis selected. Hence, no addresses to convert " +		       "to DSO.\n"); +		return -EINVAL; +	} +  	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&  		perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",  				       PERF_OUTPUT_TID|PERF_OUTPUT_PID)) @@ -230,7 +260,7 @@ static void print_sample_start(struct perf_sample *sample,  	if (PRINT_FIELD(COMM)) {  		if (latency_format)  			printf("%8.8s ", thread->comm); -		else if (PRINT_FIELD(SYM) && symbol_conf.use_callchain) +		else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)  			printf("%s ", thread->comm);  		else  			printf("%16s ", thread->comm); @@ -271,6 +301,63 @@ static void print_sample_start(struct perf_sample *sample,  	}  } +static bool sample_addr_correlates_sym(struct perf_event_attr *attr) +{ +	if ((attr->type == PERF_TYPE_SOFTWARE) && +	    ((attr->config == PERF_COUNT_SW_PAGE_FAULTS) || +	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN) || +	     (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) +		return true; + +	return false; +} + +static void print_sample_addr(union perf_event *event, +			  struct perf_sample *sample, +			  struct perf_session *session, +			  struct thread *thread, +			  struct perf_event_attr *attr) +{ +	struct addr_location al; +	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; +	const char *symname, *dsoname; + +	printf("%16" PRIx64, sample->addr); + +	if (!sample_addr_correlates_sym(attr)) +		return; + +	thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, +			      event->ip.pid, sample->addr, &al); +	if (!al.map) +		thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE, +				      event->ip.pid, sample->addr, &al); + +	al.cpu = sample->cpu; +	al.sym = NULL; + +	if (al.map) +		al.sym = map__find_symbol(al.map, al.addr, NULL); + +	if (PRINT_FIELD(SYM)) { +		if (al.sym && al.sym->name) +			symname = al.sym->name; +		else +			symname = ""; + +		printf(" %16s", symname); +	} + +	if (PRINT_FIELD(DSO)) { +		if (al.map && al.map->dso && al.map->dso->name) +			dsoname = al.map->dso->name; +		else +			dsoname = ""; + +		printf(" (%s)", dsoname); +	} +} +  static void process_event(union perf_event *event __unused,  			  struct perf_sample *sample,  			  struct perf_evsel *evsel, @@ -288,12 +375,16 @@ static void process_event(union perf_event *event __unused,  		print_trace_event(sample->cpu, sample->raw_data,  				  sample->raw_size); -	if (PRINT_FIELD(SYM)) { +	if (PRINT_FIELD(ADDR)) +		print_sample_addr(event, sample, session, thread, attr); + +	if (PRINT_FIELD(IP)) {  		if (!symbol_conf.use_callchain)  			printf(" ");  		else  			printf("\n"); -		perf_session__print_symbols(event, sample, session); +		perf_session__print_ip(event, sample, session, +					      PRINT_FIELD(SYM), PRINT_FIELD(DSO));  	}  	printf("\n"); @@ -365,6 +456,10 @@ static int process_sample_event(union perf_event *event,  		last_timestamp = sample->time;  		return 0;  	} + +	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) +		return 0; +  	scripting_ops->process_event(event, sample, evsel, session, thread);  	session->hists.stats.total_period += sample->period; @@ -985,8 +1080,9 @@ static const struct option options[] = {  	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",  		    "Look for files with symbols relative to this directory"),  	OPT_CALLBACK('f', "fields", NULL, "str", -		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,sym", +		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",  		     parse_output_fields), +	OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),  	OPT_END()  }; @@ -1167,6 +1263,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)  	if (session == NULL)  		return -ENOMEM; +	if (cpu_list) { +		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) +			return -1; +	} +  	if (!no_callchain)  		symbol_conf.use_callchain = true;  	else diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a9f06715e44d..1ad04ce29c34 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -61,6 +61,8 @@  #include <locale.h>  #define DEFAULT_SEPARATOR	" " +#define CNTR_NOT_SUPPORTED	"<not supported>" +#define CNTR_NOT_COUNTED	"<not counted>"  static struct perf_event_attr default_attrs[] = { @@ -448,6 +450,7 @@ static int run_perf_stat(int argc __used, const char **argv)  				if (verbose)  					ui__warning("%s event is not supported by the kernel.\n",  						    event_name(counter)); +				counter->supported = false;  				continue;  			} @@ -466,6 +469,7 @@ static int run_perf_stat(int argc __used, const char **argv)  			die("Not all events could be opened.\n");  			return -1;  		} +		counter->supported = true;  	}  	if (perf_evlist__set_filters(evsel_list)) { @@ -513,7 +517,10 @@ static void print_noise_pct(double total, double avg)  	if (avg)  		pct = 100.0*total/avg; -	fprintf(stderr, "  ( +-%6.2f%% )", pct); +	if (csv_output) +		fprintf(stderr, "%s%.2f%%", csv_sep, pct); +	else +		fprintf(stderr, "  ( +-%6.2f%% )", pct);  }  static void print_noise(struct perf_evsel *evsel, double avg) @@ -861,7 +868,7 @@ static void print_counter_aggr(struct perf_evsel *counter)  	if (scaled == -1) {  		fprintf(stderr, "%*s%s%*s",  			csv_output ? 0 : 18, -			"<not counted>", +			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,  			csv_sep,  			csv_output ? 0 : -24,  			event_name(counter)); @@ -878,13 +885,13 @@ static void print_counter_aggr(struct perf_evsel *counter)  	else  		abs_printout(-1, counter, avg); +	print_noise(counter, avg); +  	if (csv_output) {  		fputc('\n', stderr);  		return;  	} -	print_noise(counter, avg); -  	if (scaled) {  		double avg_enabled, avg_running; @@ -914,7 +921,8 @@ static void print_counter(struct perf_evsel *counter)  				csv_output ? 0 : -4,  				evsel_list->cpus->map[cpu], csv_sep,  				csv_output ? 0 : 18, -				"<not counted>", csv_sep, +				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, +				csv_sep,  				csv_output ? 0 : -24,  				event_name(counter)); @@ -1024,7 +1032,7 @@ static int stat__set_big_num(const struct option *opt __used,  static const struct option options[] = {  	OPT_CALLBACK('e', "event", &evsel_list, "event",  		     "event selector. use 'perf list' to list available events", -		     parse_events), +		     parse_events_option),  	OPT_CALLBACK(0, "filter", &evsel_list, "filter",  		     "event filter", parse_filter),  	OPT_BOOLEAN('i', "no-inherit", &no_inherit, diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 2da9162262b0..55f4c76f2821 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -12,6 +12,7 @@  #include "util/parse-events.h"  #include "util/symbol.h"  #include "util/thread_map.h" +#include "../../include/linux/hw_breakpoint.h"  static long page_size; @@ -245,8 +246,8 @@ static int trace_event__id(const char *evname)  	int err = -1, fd;  	if (asprintf(&filename, -		     "/sys/kernel/debug/tracing/events/syscalls/%s/id", -		     evname) < 0) +		     "%s/syscalls/%s/id", +		     debugfs_path, evname) < 0)  		return -1;  	fd = open(filename, O_RDONLY); @@ -600,6 +601,246 @@ out_free_threads:  #undef nsyscalls  } +#define TEST_ASSERT_VAL(text, cond) \ +do { \ +	if (!cond) { \ +		pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ +		return -1; \ +	} \ +} while (0) + +static int test__checkevent_tracepoint(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); +	TEST_ASSERT_VAL("wrong sample_type", +		(PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU) == +		evsel->attr.sample_type); +	TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); +	return 0; +} + +static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel; + +	TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); + +	list_for_each_entry(evsel, &evlist->entries, node) { +		TEST_ASSERT_VAL("wrong type", +			PERF_TYPE_TRACEPOINT == evsel->attr.type); +		TEST_ASSERT_VAL("wrong sample_type", +			(PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU) +			== evsel->attr.sample_type); +		TEST_ASSERT_VAL("wrong sample_period", +			1 == evsel->attr.sample_period); +	} +	return 0; +} + +static int test__checkevent_raw(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); +	return 0; +} + +static int test__checkevent_numeric(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); +	return 0; +} + +static int test__checkevent_symbolic_name(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", +			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); +	return 0; +} + +static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", +			PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); +	return 0; +} + +static int test__checkevent_genhw(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config); +	return 0; +} + +static int test__checkevent_breakpoint(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); +	TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == +					 evsel->attr.bp_type); +	TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 == +					evsel->attr.bp_len); +	return 0; +} + +static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); +	TEST_ASSERT_VAL("wrong bp_type", +			HW_BREAKPOINT_X == evsel->attr.bp_type); +	TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len); +	return 0; +} + +static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", +			PERF_TYPE_BREAKPOINT == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); +	TEST_ASSERT_VAL("wrong bp_type", +			HW_BREAKPOINT_R == evsel->attr.bp_type); +	TEST_ASSERT_VAL("wrong bp_len", +			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); +	return 0; +} + +static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel = list_entry(evlist->entries.next, +					      struct perf_evsel, node); + +	TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); +	TEST_ASSERT_VAL("wrong type", +			PERF_TYPE_BREAKPOINT == evsel->attr.type); +	TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); +	TEST_ASSERT_VAL("wrong bp_type", +			HW_BREAKPOINT_W == evsel->attr.bp_type); +	TEST_ASSERT_VAL("wrong bp_len", +			HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len); +	return 0; +} + +static struct test__event_st { +	const char *name; +	__u32 type; +	int (*check)(struct perf_evlist *evlist); +} test__events[] = { +	{ +		.name  = "syscalls:sys_enter_open", +		.check = test__checkevent_tracepoint, +	}, +	{ +		.name  = "syscalls:*", +		.check = test__checkevent_tracepoint_multi, +	}, +	{ +		.name  = "r1", +		.check = test__checkevent_raw, +	}, +	{ +		.name  = "1:1", +		.check = test__checkevent_numeric, +	}, +	{ +		.name  = "instructions", +		.check = test__checkevent_symbolic_name, +	}, +	{ +		.name  = "faults", +		.check = test__checkevent_symbolic_alias, +	}, +	{ +		.name  = "L1-dcache-load-miss", +		.check = test__checkevent_genhw, +	}, +	{ +		.name  = "mem:0", +		.check = test__checkevent_breakpoint, +	}, +	{ +		.name  = "mem:0:x", +		.check = test__checkevent_breakpoint_x, +	}, +	{ +		.name  = "mem:0:r", +		.check = test__checkevent_breakpoint_r, +	}, +	{ +		.name  = "mem:0:w", +		.check = test__checkevent_breakpoint_w, +	}, +}; + +#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) + +static int test__parse_events(void) +{ +	struct perf_evlist *evlist; +	u_int i; +	int ret = 0; + +	for (i = 0; i < TEST__EVENTS_CNT; i++) { +		struct test__event_st *e = &test__events[i]; + +		evlist = perf_evlist__new(NULL, NULL); +		if (evlist == NULL) +			break; + +		ret = parse_events(evlist, e->name, 0); +		if (ret) { +			pr_debug("failed to parse event '%s', err %d\n", +				 e->name, ret); +			break; +		} + +		ret = e->check(evlist); +		if (ret) +			break; + +		perf_evlist__delete(evlist); +	} + +	return ret; +}  static struct test {  	const char *desc;  	int (*func)(void); @@ -621,6 +862,10 @@ static struct test {  		.func = test__basic_mmap,  	},  	{ +		.desc = "parse events tests", +		.func = test__parse_events, +	}, +	{  		.func = NULL,  	},  }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f2f3f4937aa2..a43433f08300 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -990,7 +990,7 @@ static const char * const top_usage[] = {  static const struct option options[] = {  	OPT_CALLBACK('e', "event", &top.evlist, "event",  		     "event selector. use 'perf list' to list available events", -		     parse_events), +		     parse_events_option),  	OPT_INTEGER('c', "count", &default_interval,  		    "event period to sample"),  	OPT_INTEGER('p', "pid", &top.target_pid, diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 1a79df9f739f..9b4ff16cac96 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -14,6 +14,11 @@ enum chain_mode {  	CHAIN_GRAPH_REL  }; +enum chain_order { +	ORDER_CALLER, +	ORDER_CALLEE +}; +  struct callchain_node {  	struct callchain_node	*parent;  	struct list_head	siblings; @@ -41,6 +46,7 @@ struct callchain_param {  	u32			print_limit;  	double			min_percent;  	sort_chain_func_t	sort; +	enum chain_order	order;  };  struct callchain_list { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c new file mode 100644 index 000000000000..fddf40f30d3e --- /dev/null +++ b/tools/perf/util/dwarf-aux.c @@ -0,0 +1,663 @@ +/* + * dwarf-aux.c : libdw auxiliary interfaces + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#include <stdbool.h> +#include "util.h" +#include "debug.h" +#include "dwarf-aux.h" + +/** + * cu_find_realpath - Find the realpath of the target file + * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit) + * @fname:  The tail filename of the target file + * + * Find the real(long) path of @fname in @cu_die. + */ +const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname) +{ +	Dwarf_Files *files; +	size_t nfiles, i; +	const char *src = NULL; +	int ret; + +	if (!fname) +		return NULL; + +	ret = dwarf_getsrcfiles(cu_die, &files, &nfiles); +	if (ret != 0) +		return NULL; + +	for (i = 0; i < nfiles; i++) { +		src = dwarf_filesrc(files, i, NULL, NULL); +		if (strtailcmp(src, fname) == 0) +			break; +	} +	if (i == nfiles) +		return NULL; +	return src; +} + +/** + * cu_get_comp_dir - Get the path of compilation directory + * @cu_die: a CU DIE + * + * Get the path of compilation directory of given @cu_die. + * Since this depends on DW_AT_comp_dir, older gcc will not + * embedded it. In that case, this returns NULL. + */ +const char *cu_get_comp_dir(Dwarf_Die *cu_die) +{ +	Dwarf_Attribute attr; +	if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL) +		return NULL; +	return dwarf_formstring(&attr); +} + +/** + * cu_find_lineinfo - Get a line number and file name for given address + * @cu_die: a CU DIE + * @addr: An address + * @fname: a pointer which returns the file name string + * @lineno: a pointer which returns the line number + * + * Find a line number and file name for @addr in @cu_die. + */ +int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, +		    const char **fname, int *lineno) +{ +	Dwarf_Line *line; +	Dwarf_Addr laddr; + +	line = dwarf_getsrc_die(cu_die, (Dwarf_Addr)addr); +	if (line && dwarf_lineaddr(line, &laddr) == 0 && +	    addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) { +		*fname = dwarf_linesrc(line, NULL, NULL); +		if (!*fname) +			/* line number is useless without filename */ +			*lineno = 0; +	} + +	return *lineno ?: -ENOENT; +} + +/** + * die_compare_name - Compare diename and tname + * @dw_die: a DIE + * @tname: a string of target name + * + * Compare the name of @dw_die and @tname. Return false if @dw_die has no name. + */ +bool die_compare_name(Dwarf_Die *dw_die, const char *tname) +{ +	const char *name; +	name = dwarf_diename(dw_die); +	return name ? (strcmp(tname, name) == 0) : false; +} + +/** + * die_get_call_lineno - Get callsite line number of inline-function instance + * @in_die: a DIE of an inlined function instance + * + * Get call-site line number of @in_die. This means from where the inline + * function is called. + */ +int die_get_call_lineno(Dwarf_Die *in_die) +{ +	Dwarf_Attribute attr; +	Dwarf_Word ret; + +	if (!dwarf_attr(in_die, DW_AT_call_line, &attr)) +		return -ENOENT; + +	dwarf_formudata(&attr, &ret); +	return (int)ret; +} + +/** + * die_get_type - Get type DIE + * @vr_die: a DIE of a variable + * @die_mem: where to store a type DIE + * + * Get a DIE of the type of given variable (@vr_die), and store + * it to die_mem. Return NULL if fails to get a type DIE. + */ +Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +{ +	Dwarf_Attribute attr; + +	if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) && +	    dwarf_formref_die(&attr, die_mem)) +		return die_mem; +	else +		return NULL; +} + +/* Get a type die, but skip qualifiers */ +static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +{ +	int tag; + +	do { +		vr_die = die_get_type(vr_die, die_mem); +		if (!vr_die) +			break; +		tag = dwarf_tag(vr_die); +	} while (tag == DW_TAG_const_type || +		 tag == DW_TAG_restrict_type || +		 tag == DW_TAG_volatile_type || +		 tag == DW_TAG_shared_type); + +	return vr_die; +} + +/** + * die_get_real_type - Get a type die, but skip qualifiers and typedef + * @vr_die: a DIE of a variable + * @die_mem: where to store a type DIE + * + * Get a DIE of the type of given variable (@vr_die), and store + * it to die_mem. Return NULL if fails to get a type DIE. + * If the type is qualifiers (e.g. const) or typedef, this skips it + * and tries to find real type (structure or basic types, e.g. int). + */ +Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +{ +	do { +		vr_die = __die_get_real_type(vr_die, die_mem); +	} while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef); + +	return vr_die; +} + +/* Get attribute and translate it as a udata */ +static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, +			      Dwarf_Word *result) +{ +	Dwarf_Attribute attr; + +	if (dwarf_attr(tp_die, attr_name, &attr) == NULL || +	    dwarf_formudata(&attr, result) != 0) +		return -ENOENT; + +	return 0; +} + +/** + * die_is_signed_type - Check whether a type DIE is signed or not + * @tp_die: a DIE of a type + * + * Get the encoding of @tp_die and return true if the encoding + * is signed. + */ +bool die_is_signed_type(Dwarf_Die *tp_die) +{ +	Dwarf_Word ret; + +	if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret)) +		return false; + +	return (ret == DW_ATE_signed_char || ret == DW_ATE_signed || +		ret == DW_ATE_signed_fixed); +} + +/** + * die_get_data_member_location - Get the data-member offset + * @mb_die: a DIE of a member of a data structure + * @offs: The offset of the member in the data structure + * + * Get the offset of @mb_die in the data structure including @mb_die, and + * stores result offset to @offs. If any error occurs this returns errno. + */ +int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) +{ +	Dwarf_Attribute attr; +	Dwarf_Op *expr; +	size_t nexpr; +	int ret; + +	if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL) +		return -ENOENT; + +	if (dwarf_formudata(&attr, offs) != 0) { +		/* DW_AT_data_member_location should be DW_OP_plus_uconst */ +		ret = dwarf_getlocation(&attr, &expr, &nexpr); +		if (ret < 0 || nexpr == 0) +			return -ENOENT; + +		if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) { +			pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n", +				 expr[0].atom, nexpr); +			return -ENOTSUP; +		} +		*offs = (Dwarf_Word)expr[0].number; +	} +	return 0; +} + +/** + * die_find_child - Generic DIE search function in DIE tree + * @rt_die: a root DIE + * @callback: a callback function + * @data: a user data passed to the callback function + * @die_mem: a buffer for result DIE + * + * Trace DIE tree from @rt_die and call @callback for each child DIE. + * If @callback returns DIE_FIND_CB_END, this stores the DIE into + * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE, + * this continues to trace the tree. Optionally, @callback can return + * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only + * the children and trace only the siblings respectively. + * Returns NULL if @callback can't find any appropriate DIE. + */ +Dwarf_Die *die_find_child(Dwarf_Die *rt_die, +			  int (*callback)(Dwarf_Die *, void *), +			  void *data, Dwarf_Die *die_mem) +{ +	Dwarf_Die child_die; +	int ret; + +	ret = dwarf_child(rt_die, die_mem); +	if (ret != 0) +		return NULL; + +	do { +		ret = callback(die_mem, data); +		if (ret == DIE_FIND_CB_END) +			return die_mem; + +		if ((ret & DIE_FIND_CB_CHILD) && +		    die_find_child(die_mem, callback, data, &child_die)) { +			memcpy(die_mem, &child_die, sizeof(Dwarf_Die)); +			return die_mem; +		} +	} while ((ret & DIE_FIND_CB_SIBLING) && +		 dwarf_siblingof(die_mem, die_mem) == 0); + +	return NULL; +} + +struct __addr_die_search_param { +	Dwarf_Addr	addr; +	Dwarf_Die	*die_mem; +}; + +/* die_find callback for non-inlined function search */ +static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) +{ +	struct __addr_die_search_param *ad = data; + +	if (dwarf_tag(fn_die) == DW_TAG_subprogram && +	    dwarf_haspc(fn_die, ad->addr)) { +		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); +		return DWARF_CB_ABORT; +	} +	return DWARF_CB_OK; +} + +/** + * die_find_realfunc - Search a non-inlined function at given address + * @cu_die: a CU DIE which including @addr + * @addr: target address + * @die_mem: a buffer for result DIE + * + * Search a non-inlined function DIE which includes @addr. Stores the + * DIE to @die_mem and returns it if found. Returns NULl if failed. + */ +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, +				    Dwarf_Die *die_mem) +{ +	struct __addr_die_search_param ad; +	ad.addr = addr; +	ad.die_mem = die_mem; +	/* dwarf_getscopes can't find subprogram. */ +	if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0)) +		return NULL; +	else +		return die_mem; +} + +/* die_find callback for inline function search */ +static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data) +{ +	Dwarf_Addr *addr = data; + +	if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine && +	    dwarf_haspc(die_mem, *addr)) +		return DIE_FIND_CB_END; + +	return DIE_FIND_CB_CONTINUE; +} + +/** + * die_find_inlinefunc - Search an inlined function at given address + * @cu_die: a CU DIE which including @addr + * @addr: target address + * @die_mem: a buffer for result DIE + * + * Search an inlined function DIE which includes @addr. Stores the + * DIE to @die_mem and returns it if found. Returns NULl if failed. + * If several inlined functions are expanded recursively, this trace + * it and returns deepest one. + */ +Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, +			       Dwarf_Die *die_mem) +{ +	Dwarf_Die tmp_die; + +	sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die); +	if (!sp_die) +		return NULL; + +	/* Inlined function could be recursive. Trace it until fail */ +	while (sp_die) { +		memcpy(die_mem, sp_die, sizeof(Dwarf_Die)); +		sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, +					&tmp_die); +	} + +	return die_mem; +} + +/* Line walker internal parameters */ +struct __line_walk_param { +	const char *fname; +	line_walk_callback_t callback; +	void *data; +	int retval; +}; + +static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) +{ +	struct __line_walk_param *lw = data; +	Dwarf_Addr addr; +	int lineno; + +	if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { +		lineno = die_get_call_lineno(in_die); +		if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { +			lw->retval = lw->callback(lw->fname, lineno, addr, +						  lw->data); +			if (lw->retval != 0) +				return DIE_FIND_CB_END; +		} +	} +	return DIE_FIND_CB_SIBLING; +} + +/* Walk on lines of blocks included in given DIE */ +static int __die_walk_funclines(Dwarf_Die *sp_die, +				line_walk_callback_t callback, void *data) +{ +	struct __line_walk_param lw = { +		.callback = callback, +		.data = data, +		.retval = 0, +	}; +	Dwarf_Die die_mem; +	Dwarf_Addr addr; +	int lineno; + +	/* Handle function declaration line */ +	lw.fname = dwarf_decl_file(sp_die); +	if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 && +	    dwarf_entrypc(sp_die, &addr) == 0) { +		lw.retval = callback(lw.fname, lineno, addr, data); +		if (lw.retval != 0) +			goto done; +	} +	die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem); +done: +	return lw.retval; +} + +static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) +{ +	struct __line_walk_param *lw = data; + +	lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data); +	if (lw->retval != 0) +		return DWARF_CB_ABORT; + +	return DWARF_CB_OK; +} + +/** + * die_walk_lines - Walk on lines inside given DIE + * @rt_die: a root DIE (CU or subprogram) + * @callback: callback routine + * @data: user data + * + * Walk on all lines inside given @rt_die and call @callback on each line. + * If the @rt_die is a function, walk only on the lines inside the function, + * otherwise @rt_die must be a CU DIE. + * Note that this walks not only dwarf line list, but also function entries + * and inline call-site. + */ +int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) +{ +	Dwarf_Lines *lines; +	Dwarf_Line *line; +	Dwarf_Addr addr; +	const char *fname; +	int lineno, ret = 0; +	Dwarf_Die die_mem, *cu_die; +	size_t nlines, i; + +	/* Get the CU die */ +	if (dwarf_tag(rt_die) == DW_TAG_subprogram) +		cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); +	else +		cu_die = rt_die; +	if (!cu_die) { +		pr_debug2("Failed to get CU from subprogram\n"); +		return -EINVAL; +	} + +	/* Get lines list in the CU */ +	if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) { +		pr_debug2("Failed to get source lines on this CU.\n"); +		return -ENOENT; +	} +	pr_debug2("Get %zd lines from this CU\n", nlines); + +	/* Walk on the lines on lines list */ +	for (i = 0; i < nlines; i++) { +		line = dwarf_onesrcline(lines, i); +		if (line == NULL || +		    dwarf_lineno(line, &lineno) != 0 || +		    dwarf_lineaddr(line, &addr) != 0) { +			pr_debug2("Failed to get line info. " +				  "Possible error in debuginfo.\n"); +			continue; +		} +		/* Filter lines based on address */ +		if (rt_die != cu_die) +			/* +			 * Address filtering +			 * The line is included in given function, and +			 * no inline block includes it. +			 */ +			if (!dwarf_haspc(rt_die, addr) || +			    die_find_inlinefunc(rt_die, addr, &die_mem)) +				continue; +		/* Get source line */ +		fname = dwarf_linesrc(line, NULL, NULL); + +		ret = callback(fname, lineno, addr, data); +		if (ret != 0) +			return ret; +	} + +	/* +	 * Dwarf lines doesn't include function declarations and inlined +	 * subroutines. We have to check functions list or given function. +	 */ +	if (rt_die != cu_die) +		ret = __die_walk_funclines(rt_die, callback, data); +	else { +		struct __line_walk_param param = { +			.callback = callback, +			.data = data, +			.retval = 0, +		}; +		dwarf_getfuncs(cu_die, __die_walk_culines_cb, ¶m, 0); +		ret = param.retval; +	} + +	return ret; +} + +struct __find_variable_param { +	const char *name; +	Dwarf_Addr addr; +}; + +static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) +{ +	struct __find_variable_param *fvp = data; +	int tag; + +	tag = dwarf_tag(die_mem); +	if ((tag == DW_TAG_formal_parameter || +	     tag == DW_TAG_variable) && +	    die_compare_name(die_mem, fvp->name)) +		return DIE_FIND_CB_END; + +	if (dwarf_haspc(die_mem, fvp->addr)) +		return DIE_FIND_CB_CONTINUE; +	else +		return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_at - Find a given name variable at given address + * @sp_die: a function DIE + * @name: variable name + * @addr: address + * @die_mem: a buffer for result DIE + * + * Find a variable DIE called @name at @addr in @sp_die. + */ +Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, +				Dwarf_Addr addr, Dwarf_Die *die_mem) +{ +	struct __find_variable_param fvp = { .name = name, .addr = addr}; + +	return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp, +			      die_mem); +} + +static int __die_find_member_cb(Dwarf_Die *die_mem, void *data) +{ +	const char *name = data; + +	if ((dwarf_tag(die_mem) == DW_TAG_member) && +	    die_compare_name(die_mem, name)) +		return DIE_FIND_CB_END; + +	return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_member - Find a given name member in a data structure + * @st_die: a data structure type DIE + * @name: member name + * @die_mem: a buffer for result DIE + * + * Find a member DIE called @name in @st_die. + */ +Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, +			   Dwarf_Die *die_mem) +{ +	return die_find_child(st_die, __die_find_member_cb, (void *)name, +			      die_mem); +} + +/** + * die_get_typename - Get the name of given variable DIE + * @vr_die: a variable DIE + * @buf: a buffer for result type name + * @len: a max-length of @buf + * + * Get the name of @vr_die and stores it to @buf. Return the actual length + * of type name if succeeded. Return -E2BIG if @len is not enough long, and + * Return -ENOENT if failed to find type name. + * Note that the result will stores typedef name if possible, and stores + * "*(function_type)" if the type is a function pointer. + */ +int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) +{ +	Dwarf_Die type; +	int tag, ret, ret2; +	const char *tmp = ""; + +	if (__die_get_real_type(vr_die, &type) == NULL) +		return -ENOENT; + +	tag = dwarf_tag(&type); +	if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) +		tmp = "*"; +	else if (tag == DW_TAG_subroutine_type) { +		/* Function pointer */ +		ret = snprintf(buf, len, "(function_type)"); +		return (ret >= len) ? -E2BIG : ret; +	} else { +		if (!dwarf_diename(&type)) +			return -ENOENT; +		if (tag == DW_TAG_union_type) +			tmp = "union "; +		else if (tag == DW_TAG_structure_type) +			tmp = "struct "; +		/* Write a base name */ +		ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); +		return (ret >= len) ? -E2BIG : ret; +	} +	ret = die_get_typename(&type, buf, len); +	if (ret > 0) { +		ret2 = snprintf(buf + ret, len - ret, "%s", tmp); +		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; +	} +	return ret; +} + +/** + * die_get_varname - Get the name and type of given variable DIE + * @vr_die: a variable DIE + * @buf: a buffer for type and variable name + * @len: the max-length of @buf + * + * Get the name and type of @vr_die and stores it in @buf as "type\tname". + */ +int die_get_varname(Dwarf_Die *vr_die, char *buf, int len) +{ +	int ret, ret2; + +	ret = die_get_typename(vr_die, buf, len); +	if (ret < 0) { +		pr_debug("Failed to get type, make it unknown.\n"); +		ret = snprintf(buf, len, "(unknown_type)"); +	} +	if (ret > 0) { +		ret2 = snprintf(buf + ret, len - ret, "\t%s", +				dwarf_diename(vr_die)); +		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; +	} +	return ret; +} + diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h new file mode 100644 index 000000000000..bc3b21167e70 --- /dev/null +++ b/tools/perf/util/dwarf-aux.h @@ -0,0 +1,100 @@ +#ifndef _DWARF_AUX_H +#define _DWARF_AUX_H +/* + * dwarf-aux.h : libdw auxiliary interfaces + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#include <dwarf.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <elfutils/version.h> + +/* Find the realpath of the target file */ +extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); + +/* Get DW_AT_comp_dir (should be NULL with older gcc) */ +extern const char *cu_get_comp_dir(Dwarf_Die *cu_die); + +/* Get a line number and file name for given address */ +extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, +			    const char **fname, int *lineno); + +/* Compare diename and tname */ +extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); + +/* Get callsite line number of inline-function instance */ +extern int die_get_call_lineno(Dwarf_Die *in_die); + +/* Get type die */ +extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); + +/* Get a type die, but skip qualifiers and typedef */ +extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); + +/* Check whether the DIE is signed or not */ +extern bool die_is_signed_type(Dwarf_Die *tp_die); + +/* Get data_member_location offset */ +extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); + +/* Return values for die_find_child() callbacks */ +enum { +	DIE_FIND_CB_END = 0,		/* End of Search */ +	DIE_FIND_CB_CHILD = 1,		/* Search only children */ +	DIE_FIND_CB_SIBLING = 2,	/* Search only siblings */ +	DIE_FIND_CB_CONTINUE = 3,	/* Search children and siblings */ +}; + +/* Search child DIEs */ +extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die, +				 int (*callback)(Dwarf_Die *, void *), +				 void *data, Dwarf_Die *die_mem); + +/* Search a non-inlined function including given address */ +extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, +				    Dwarf_Die *die_mem); + +/* Search an inlined function including given address */ +extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, +				      Dwarf_Die *die_mem); + +/* Walker on lines (Note: line number will not be sorted) */ +typedef int (* line_walk_callback_t) (const char *fname, int lineno, +				      Dwarf_Addr addr, void *data); + +/* + * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on + * the lines inside the subprogram, otherwise the DIE must be a CU DIE. + */ +extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, +			  void *data); + +/* Find a variable called 'name' at given address */ +extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, +				       Dwarf_Addr addr, Dwarf_Die *die_mem); + +/* Find a member called 'name' */ +extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, +				  Dwarf_Die *die_mem); + +/* Get the name of given variable DIE */ +extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len); + +/* Get the name and type of given variable DIE, stored as "type\tname" */ +extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len); +#endif diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0239eb87b232..a03a36b7908a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -377,6 +377,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,  		array++;  	} +	data->addr = 0;  	if (type & PERF_SAMPLE_ADDR) {  		data->addr = *array;  		array++; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 7e9366e4490b..e9a31554e265 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -61,6 +61,7 @@ struct perf_evsel {  		off_t		id_offset;  	};  	struct cgroup_sel	*cgrp; +	bool 			supported;  };  struct cpu_map; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index afb0849fe530..cb2959a3fb43 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -877,9 +877,12 @@ int perf_session__read_header(struct perf_session *session, int fd)  		struct perf_evsel *evsel;  		off_t tmp; -		if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr))) +		if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)  			goto out_errno; +		if (header->needs_swap) +			perf_event__attr_swap(&f_attr.attr); +  		tmp = lseek(fd, 0, SEEK_CUR);  		evsel = perf_evsel__new(&f_attr.attr, i); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 627a02e03c57..677e1da6bb3e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -14,7 +14,8 @@ enum hist_filter {  struct callchain_param	callchain_param = {  	.mode	= CHAIN_GRAPH_REL, -	.min_percent = 0.5 +	.min_percent = 0.5, +	.order  = ORDER_CALLEE  };  u16 hists__col_len(struct hists *self, enum hist_column col) @@ -846,6 +847,9 @@ print_entries:  	for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {  		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); +		if (h->filtered) +			continue; +  		if (show_displacement) {  			if (h->pair != NULL)  				displacement = ((long)h->pair->position - diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 41982c373faf..4ea7e19f5251 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -86,22 +86,24 @@ static const char *sw_event_names[PERF_COUNT_SW_MAX] = {  #define MAX_ALIASES 8 -static const char *hw_cache[][MAX_ALIASES] = { +static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {   { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},   { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	}, - { "LLC",	"L2"							}, + { "LLC",	"L2",							},   { "dTLB",	"d-tlb",	"Data-TLB",				},   { "iTLB",	"i-tlb",	"Instruction-TLB",			},   { "branch",	"branches",	"bpu",		"btb",		"bpc",	}, + { "node",								},  }; -static const char *hw_cache_op[][MAX_ALIASES] = { +static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {   { "load",	"loads",	"read",					},   { "store",	"stores",	"write",				},   { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},  }; -static const char *hw_cache_result[][MAX_ALIASES] = { +static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] +				  [MAX_ALIASES] = {   { "refs",	"Reference",	"ops",		"access",		},   { "misses",	"miss",							},  }; @@ -124,6 +126,7 @@ static unsigned long hw_cache_stat[C(MAX)] = {   [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),   [C(ITLB)]	= (CACHE_READ),   [C(BPU)]	= (CACHE_READ), + [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),  };  #define for_each_subsystem(sys_dir, sys_dirent, sys_next)	       \ @@ -393,7 +396,7 @@ parse_generic_hw_event(const char **str, struct perf_event_attr *attr)  						PERF_COUNT_HW_CACHE_OP_MAX);  			if (cache_op >= 0) {  				if (!is_cache_op_valid(cache_type, cache_op)) -					return 0; +					return EVT_FAILED;  				continue;  			}  		} @@ -475,7 +478,7 @@ parse_single_tracepoint_event(char *sys_name,  /* sys + ':' + event + ':' + flags*/  #define MAX_EVOPT_LEN	(MAX_EVENT_LENGTH * 2 + 2 + 128)  static enum event_result -parse_multiple_tracepoint_event(const struct option *opt, char *sys_name, +parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name,  				const char *evt_exp, char *flags)  {  	char evt_path[MAXPATHLEN]; @@ -509,7 +512,7 @@ parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,  		if (len < 0)  			return EVT_FAILED; -		if (parse_events(opt, event_opt, 0)) +		if (parse_events(evlist, event_opt, 0))  			return EVT_FAILED;  	} @@ -517,7 +520,7 @@ parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,  }  static enum event_result -parse_tracepoint_event(const struct option *opt, const char **strp, +parse_tracepoint_event(struct perf_evlist *evlist, const char **strp,  		       struct perf_event_attr *attr)  {  	const char *evt_name; @@ -557,8 +560,8 @@ parse_tracepoint_event(const struct option *opt, const char **strp,  		return EVT_FAILED;  	if (strpbrk(evt_name, "*?")) {  		*strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */ -		return parse_multiple_tracepoint_event(opt, sys_name, evt_name, -						       flags); +		return parse_multiple_tracepoint_event(evlist, sys_name, +						       evt_name, flags);  	} else {  		return parse_single_tracepoint_event(sys_name, evt_name,  						     evt_length, attr, strp); @@ -778,12 +781,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)   * Symbolic names are (almost) exactly matched.   */  static enum event_result -parse_event_symbols(const struct option *opt, const char **str, +parse_event_symbols(struct perf_evlist *evlist, const char **str,  		    struct perf_event_attr *attr)  {  	enum event_result ret; -	ret = parse_tracepoint_event(opt, str, attr); +	ret = parse_tracepoint_event(evlist, str, attr);  	if (ret != EVT_FAILED)  		goto modifier; @@ -822,9 +825,8 @@ modifier:  	return ret;  } -int parse_events(const struct option *opt, const char *str, int unset __used) +int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)  { -	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;  	struct perf_event_attr attr;  	enum event_result ret;  	const char *ostr; @@ -832,7 +834,7 @@ int parse_events(const struct option *opt, const char *str, int unset __used)  	for (;;) {  		ostr = str;  		memset(&attr, 0, sizeof(attr)); -		ret = parse_event_symbols(opt, &str, &attr); +		ret = parse_event_symbols(evlist, &str, &attr);  		if (ret == EVT_FAILED)  			return -1; @@ -863,6 +865,13 @@ int parse_events(const struct option *opt, const char *str, int unset __used)  	return 0;  } +int parse_events_option(const struct option *opt, const char *str, +			int unset __used) +{ +	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; +	return parse_events(evlist, str, unset); +} +  int parse_filter(const struct option *opt, const char *str,  		 int unset __used)  { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 746d3fcbfc2a..2f8e375e038d 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -8,6 +8,7 @@  struct list_head;  struct perf_evsel; +struct perf_evlist;  struct option; @@ -24,7 +25,10 @@ const char *event_type(int type);  const char *event_name(struct perf_evsel *event);  extern const char *__event_name(int type, u64 config); -extern int parse_events(const struct option *opt, const char *str, int unset); +extern int parse_events_option(const struct option *opt, const char *str, +			       int unset); +extern int parse_events(struct perf_evlist *evlist, const char *str, +			int unset);  extern int parse_filter(const struct option *opt, const char *str, int unset);  #define EVENTS_HELP_MAX (128*1024) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f0223166e761..b82d54fa2c56 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -117,6 +117,10 @@ static struct map *kernel_get_module_map(const char *module)  	struct rb_node *nd;  	struct map_groups *grp = &machine.kmaps; +	/* A file path -- this is an offline module */ +	if (module && strchr(module, '/')) +		return machine__new_module(&machine, 0, module); +  	if (!module)  		module = "kernel"; @@ -170,16 +174,24 @@ const char *kernel_get_module_path(const char *module)  }  #ifdef DWARF_SUPPORT -static int open_vmlinux(const char *module) +/* Open new debuginfo of given module */ +static struct debuginfo *open_debuginfo(const char *module)  { -	const char *path = kernel_get_module_path(module); -	if (!path) { -		pr_err("Failed to find path of %s module.\n", -		       module ?: "kernel"); -		return -ENOENT; +	const char *path; + +	/* A file path -- this is an offline module */ +	if (module && strchr(module, '/')) +		path = module; +	else { +		path = kernel_get_module_path(module); + +		if (!path) { +			pr_err("Failed to find path of %s module.\n", +			       module ?: "kernel"); +			return NULL; +		}  	} -	pr_debug("Try to open %s\n", path); -	return open(path, O_RDONLY); +	return debuginfo__new(path);  }  /* @@ -193,13 +205,24 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,  	struct map *map;  	u64 addr;  	int ret = -ENOENT; +	struct debuginfo *dinfo;  	sym = __find_kernel_function_by_name(tp->symbol, &map);  	if (sym) {  		addr = map->unmap_ip(map, sym->start + tp->offset);  		pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol,  			 tp->offset, addr); -		ret = find_perf_probe_point((unsigned long)addr, pp); + +		dinfo = debuginfo__new_online_kernel(addr); +		if (dinfo) { +			ret = debuginfo__find_probe_point(dinfo, +						 (unsigned long)addr, pp); +			debuginfo__delete(dinfo); +		} else { +			pr_debug("Failed to open debuginfo at 0x%" PRIx64 "\n", +				 addr); +			ret = -ENOENT; +		}  	}  	if (ret <= 0) {  		pr_debug("Failed to find corresponding probes from " @@ -214,30 +237,70 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,  	return 0;  } +static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, +					    int ntevs, const char *module) +{ +	int i, ret = 0; +	char *tmp; + +	if (!module) +		return 0; + +	tmp = strrchr(module, '/'); +	if (tmp) { +		/* This is a module path -- get the module name */ +		module = strdup(tmp + 1); +		if (!module) +			return -ENOMEM; +		tmp = strchr(module, '.'); +		if (tmp) +			*tmp = '\0'; +		tmp = (char *)module;	/* For free() */ +	} + +	for (i = 0; i < ntevs; i++) { +		tevs[i].point.module = strdup(module); +		if (!tevs[i].point.module) { +			ret = -ENOMEM; +			break; +		} +	} + +	if (tmp) +		free(tmp); + +	return ret; +} +  /* Try to find perf_probe_event with debuginfo */  static int try_to_find_probe_trace_events(struct perf_probe_event *pev, -					   struct probe_trace_event **tevs, -					   int max_tevs, const char *module) +					  struct probe_trace_event **tevs, +					  int max_tevs, const char *module)  {  	bool need_dwarf = perf_probe_event_need_dwarf(pev); -	int fd, ntevs; +	struct debuginfo *dinfo = open_debuginfo(module); +	int ntevs, ret = 0; -	fd = open_vmlinux(module); -	if (fd < 0) { +	if (!dinfo) {  		if (need_dwarf) {  			pr_warning("Failed to open debuginfo file.\n"); -			return fd; +			return -ENOENT;  		} -		pr_debug("Could not open vmlinux. Try to use symbols.\n"); +		pr_debug("Could not open debuginfo. Try to use symbols.\n");  		return 0;  	} -	/* Searching trace events corresponding to probe event */ -	ntevs = find_probe_trace_events(fd, pev, tevs, max_tevs); +	/* Searching trace events corresponding to a probe event */ +	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); + +	debuginfo__delete(dinfo);  	if (ntevs > 0) {	/* Succeeded to find trace events */  		pr_debug("find %d probe_trace_events.\n", ntevs); -		return ntevs; +		if (module) +			ret = add_module_to_probe_trace_events(*tevs, ntevs, +							       module); +		return ret < 0 ? ret : ntevs;  	}  	if (ntevs == 0)	{	/* No error but failed to find probe point. */ @@ -371,8 +434,9 @@ int show_line_range(struct line_range *lr, const char *module)  {  	int l = 1;  	struct line_node *ln; +	struct debuginfo *dinfo;  	FILE *fp; -	int fd, ret; +	int ret;  	char *tmp;  	/* Search a line range */ @@ -380,13 +444,14 @@ int show_line_range(struct line_range *lr, const char *module)  	if (ret < 0)  		return ret; -	fd = open_vmlinux(module); -	if (fd < 0) { +	dinfo = open_debuginfo(module); +	if (!dinfo) {  		pr_warning("Failed to open debuginfo file.\n"); -		return fd; +		return -ENOENT;  	} -	ret = find_line_range(fd, lr); +	ret = debuginfo__find_line_range(dinfo, lr); +	debuginfo__delete(dinfo);  	if (ret == 0) {  		pr_warning("Specified source line is not found.\n");  		return -ENOENT; @@ -448,7 +513,8 @@ end:  	return ret;  } -static int show_available_vars_at(int fd, struct perf_probe_event *pev, +static int show_available_vars_at(struct debuginfo *dinfo, +				  struct perf_probe_event *pev,  				  int max_vls, struct strfilter *_filter,  				  bool externs)  { @@ -463,7 +529,8 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,  		return -EINVAL;  	pr_debug("Searching variables at %s\n", buf); -	ret = find_available_vars_at(fd, pev, &vls, max_vls, externs); +	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, +						max_vls, externs);  	if (ret <= 0) {  		pr_err("Failed to find variables at %s (%d)\n", buf, ret);  		goto end; @@ -504,24 +571,26 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,  			int max_vls, const char *module,  			struct strfilter *_filter, bool externs)  { -	int i, fd, ret = 0; +	int i, ret = 0; +	struct debuginfo *dinfo;  	ret = init_vmlinux();  	if (ret < 0)  		return ret; +	dinfo = open_debuginfo(module); +	if (!dinfo) { +		pr_warning("Failed to open debuginfo file.\n"); +		return -ENOENT; +	} +  	setup_pager(); -	for (i = 0; i < npevs && ret >= 0; i++) { -		fd = open_vmlinux(module); -		if (fd < 0) { -			pr_warning("Failed to open debug information file.\n"); -			ret = fd; -			break; -		} -		ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter, +	for (i = 0; i < npevs && ret >= 0; i++) +		ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,  					     externs); -	} + +	debuginfo__delete(dinfo);  	return ret;  } @@ -990,7 +1059,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)  /* Parse probe_events event into struct probe_point */  static int parse_probe_trace_command(const char *cmd, -					struct probe_trace_event *tev) +				     struct probe_trace_event *tev)  {  	struct probe_trace_point *tp = &tev->point;  	char pr; @@ -1023,8 +1092,14 @@ static int parse_probe_trace_command(const char *cmd,  	tp->retprobe = (pr == 'r'); -	/* Scan function name and offset */ -	ret = sscanf(argv[1], "%a[^+]+%lu", (float *)(void *)&tp->symbol, +	/* Scan module name(if there), function name and offset */ +	p = strchr(argv[1], ':'); +	if (p) { +		tp->module = strndup(argv[1], p - argv[1]); +		p++; +	} else +		p = argv[1]; +	ret = sscanf(p, "%a[^+]+%lu", (float *)(void *)&tp->symbol,  		     &tp->offset);  	if (ret == 1)  		tp->offset = 0; @@ -1269,9 +1344,10 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)  	if (buf == NULL)  		return NULL; -	len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s+%lu", +	len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu",  			 tp->retprobe ? 'r' : 'p',  			 tev->group, tev->event, +			 tp->module ?: "", tp->module ? ":" : "",  			 tp->symbol, tp->offset);  	if (len <= 0)  		goto error; @@ -1378,6 +1454,8 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)  		free(tev->group);  	if (tev->point.symbol)  		free(tev->point.symbol); +	if (tev->point.module) +		free(tev->point.module);  	for (i = 0; i < tev->nargs; i++) {  		if (tev->args[i].name)  			free(tev->args[i].name); @@ -1729,7 +1807,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,  	/* Convert perf_probe_event with debuginfo */  	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);  	if (ret != 0) -		return ret; +		return ret;	/* Found in debuginfo or got an error */  	/* Allocate trace event buffer */  	tev = *tevs = zalloc(sizeof(struct probe_trace_event)); @@ -1742,6 +1820,11 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,  		ret = -ENOMEM;  		goto error;  	} +	tev->point.module = strdup(module); +	if (tev->point.module == NULL) { +		ret = -ENOMEM; +		goto error; +	}  	tev->point.offset = pev->point.offset;  	tev->point.retprobe = pev->point.retprobe;  	tev->nargs = pev->nargs; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 3434fc9d79d5..a7dee835f49c 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -10,6 +10,7 @@ extern bool probe_event_dry_run;  /* kprobe-tracer tracing point */  struct probe_trace_point {  	char		*symbol;	/* Base symbol */ +	char		*module;	/* Module name */  	unsigned long	offset;		/* Offset from symbol */  	bool		retprobe;	/* Return probe flag */  }; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 3b9d0b800d5c..3e44a3e36519 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -43,21 +43,6 @@  /* Kprobe tracer basic type is up to u64 */  #define MAX_BASIC_TYPE_BITS	64 -/* - * Compare the tail of two strings. - * Return 0 if whole of either string is same as another's tail part. - */ -static int strtailcmp(const char *s1, const char *s2) -{ -	int i1 = strlen(s1); -	int i2 = strlen(s2); -	while (--i1 >= 0 && --i2 >= 0) { -		if (s1[i1] != s2[i2]) -			return s1[i1] - s2[i2]; -	} -	return 0; -} -  /* Line number list operations */  /* Add a line to line number list */ @@ -131,29 +116,37 @@ static const Dwfl_Callbacks offline_callbacks = {  };  /* Get a Dwarf from offline image */ -static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias) +static int debuginfo__init_offline_dwarf(struct debuginfo *self, +					 const char *path)  {  	Dwfl_Module *mod; -	Dwarf *dbg = NULL; +	int fd; -	if (!dwflp) -		return NULL; +	fd = open(path, O_RDONLY); +	if (fd < 0) +		return fd; -	*dwflp = dwfl_begin(&offline_callbacks); -	if (!*dwflp) -		return NULL; +	self->dwfl = dwfl_begin(&offline_callbacks); +	if (!self->dwfl) +		goto error; -	mod = dwfl_report_offline(*dwflp, "", "", fd); +	mod = dwfl_report_offline(self->dwfl, "", "", fd);  	if (!mod)  		goto error; -	dbg = dwfl_module_getdwarf(mod, bias); -	if (!dbg) { +	self->dbg = dwfl_module_getdwarf(mod, &self->bias); +	if (!self->dbg) +		goto error; + +	return 0;  error: -		dwfl_end(*dwflp); -		*dwflp = NULL; -	} -	return dbg; +	if (self->dwfl) +		dwfl_end(self->dwfl); +	else +		close(fd); +	memset(self, 0, sizeof(*self)); + +	return -ENOENT;  }  #if _ELFUTILS_PREREQ(0, 148) @@ -189,597 +182,81 @@ static const Dwfl_Callbacks kernel_callbacks = {  };  /* Get a Dwarf from live kernel image */ -static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp, -					  Dwarf_Addr *bias) +static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, +					       Dwarf_Addr addr)  { -	Dwarf *dbg; - -	if (!dwflp) -		return NULL; - -	*dwflp = dwfl_begin(&kernel_callbacks); -	if (!*dwflp) -		return NULL; +	self->dwfl = dwfl_begin(&kernel_callbacks); +	if (!self->dwfl) +		return -EINVAL;  	/* Load the kernel dwarves: Don't care the result here */ -	dwfl_linux_kernel_report_kernel(*dwflp); -	dwfl_linux_kernel_report_modules(*dwflp); +	dwfl_linux_kernel_report_kernel(self->dwfl); +	dwfl_linux_kernel_report_modules(self->dwfl); -	dbg = dwfl_addrdwarf(*dwflp, addr, bias); +	self->dbg = dwfl_addrdwarf(self->dwfl, addr, &self->bias);  	/* Here, check whether we could get a real dwarf */ -	if (!dbg) { +	if (!self->dbg) {  		pr_debug("Failed to find kernel dwarf at %lx\n",  			 (unsigned long)addr); -		dwfl_end(*dwflp); -		*dwflp = NULL; +		dwfl_end(self->dwfl); +		memset(self, 0, sizeof(*self)); +		return -ENOENT;  	} -	return dbg; + +	return 0;  }  #else  /* With older elfutils, this just support kernel module... */ -static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr __used, Dwfl **dwflp, -					  Dwarf_Addr *bias) +static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, +					       Dwarf_Addr addr __used)  { -	int fd;  	const char *path = kernel_get_module_path("kernel");  	if (!path) {  		pr_err("Failed to find vmlinux path\n"); -		return NULL; +		return -ENOENT;  	}  	pr_debug2("Use file %s for debuginfo\n", path); -	fd = open(path, O_RDONLY); -	if (fd < 0) -		return NULL; - -	return dwfl_init_offline_dwarf(fd, dwflp, bias); +	return debuginfo__init_offline_dwarf(self, path);  }  #endif -/* Dwarf wrappers */ - -/* Find the realpath of the target file. */ -static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname) -{ -	Dwarf_Files *files; -	size_t nfiles, i; -	const char *src = NULL; -	int ret; - -	if (!fname) -		return NULL; - -	ret = dwarf_getsrcfiles(cu_die, &files, &nfiles); -	if (ret != 0) -		return NULL; - -	for (i = 0; i < nfiles; i++) { -		src = dwarf_filesrc(files, i, NULL, NULL); -		if (strtailcmp(src, fname) == 0) -			break; -	} -	if (i == nfiles) -		return NULL; -	return src; -} - -/* Get DW_AT_comp_dir (should be NULL with older gcc) */ -static const char *cu_get_comp_dir(Dwarf_Die *cu_die) -{ -	Dwarf_Attribute attr; -	if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL) -		return NULL; -	return dwarf_formstring(&attr); -} - -/* Get a line number and file name for given address */ -static int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, -			    const char **fname, int *lineno) -{ -	Dwarf_Line *line; -	Dwarf_Addr laddr; - -	line = dwarf_getsrc_die(cudie, (Dwarf_Addr)addr); -	if (line && dwarf_lineaddr(line, &laddr) == 0 && -	    addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) { -		*fname = dwarf_linesrc(line, NULL, NULL); -		if (!*fname) -			/* line number is useless without filename */ -			*lineno = 0; -	} - -	return *lineno ?: -ENOENT; -} - -/* Compare diename and tname */ -static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) -{ -	const char *name; -	name = dwarf_diename(dw_die); -	return name ? (strcmp(tname, name) == 0) : false; -} - -/* Get callsite line number of inline-function instance */ -static int die_get_call_lineno(Dwarf_Die *in_die) -{ -	Dwarf_Attribute attr; -	Dwarf_Word ret; - -	if (!dwarf_attr(in_die, DW_AT_call_line, &attr)) -		return -ENOENT; - -	dwarf_formudata(&attr, &ret); -	return (int)ret; -} - -/* Get type die */ -static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) -{ -	Dwarf_Attribute attr; - -	if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) && -	    dwarf_formref_die(&attr, die_mem)) -		return die_mem; -	else -		return NULL; -} - -/* Get a type die, but skip qualifiers */ -static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) -{ -	int tag; - -	do { -		vr_die = die_get_type(vr_die, die_mem); -		if (!vr_die) -			break; -		tag = dwarf_tag(vr_die); -	} while (tag == DW_TAG_const_type || -		 tag == DW_TAG_restrict_type || -		 tag == DW_TAG_volatile_type || -		 tag == DW_TAG_shared_type); - -	return vr_die; -} - -/* Get a type die, but skip qualifiers and typedef */ -static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) -{ -	do { -		vr_die = __die_get_real_type(vr_die, die_mem); -	} while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef); - -	return vr_die; -} - -static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, -			      Dwarf_Word *result) -{ -	Dwarf_Attribute attr; - -	if (dwarf_attr(tp_die, attr_name, &attr) == NULL || -	    dwarf_formudata(&attr, result) != 0) -		return -ENOENT; - -	return 0; -} - -static bool die_is_signed_type(Dwarf_Die *tp_die) -{ -	Dwarf_Word ret; - -	if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret)) -		return false; - -	return (ret == DW_ATE_signed_char || ret == DW_ATE_signed || -		ret == DW_ATE_signed_fixed); -} - -static int die_get_byte_size(Dwarf_Die *tp_die) -{ -	Dwarf_Word ret; - -	if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret)) -		return 0; - -	return (int)ret; -} - -static int die_get_bit_size(Dwarf_Die *tp_die) -{ -	Dwarf_Word ret; - -	if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret)) -		return 0; - -	return (int)ret; -} - -static int die_get_bit_offset(Dwarf_Die *tp_die) -{ -	Dwarf_Word ret; - -	if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret)) -		return 0; - -	return (int)ret; -} - -/* Get data_member_location offset */ -static int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) -{ -	Dwarf_Attribute attr; -	Dwarf_Op *expr; -	size_t nexpr; -	int ret; - -	if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL) -		return -ENOENT; - -	if (dwarf_formudata(&attr, offs) != 0) { -		/* DW_AT_data_member_location should be DW_OP_plus_uconst */ -		ret = dwarf_getlocation(&attr, &expr, &nexpr); -		if (ret < 0 || nexpr == 0) -			return -ENOENT; - -		if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) { -			pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n", -				 expr[0].atom, nexpr); -			return -ENOTSUP; -		} -		*offs = (Dwarf_Word)expr[0].number; -	} -	return 0; -} - -/* Return values for die_find callbacks */ -enum { -	DIE_FIND_CB_FOUND = 0,		/* End of Search */ -	DIE_FIND_CB_CHILD = 1,		/* Search only children */ -	DIE_FIND_CB_SIBLING = 2,	/* Search only siblings */ -	DIE_FIND_CB_CONTINUE = 3,	/* Search children and siblings */ -}; - -/* Search a child die */ -static Dwarf_Die *die_find_child(Dwarf_Die *rt_die, -				 int (*callback)(Dwarf_Die *, void *), -				 void *data, Dwarf_Die *die_mem) +struct debuginfo *debuginfo__new(const char *path)  { -	Dwarf_Die child_die; -	int ret; - -	ret = dwarf_child(rt_die, die_mem); -	if (ret != 0) +	struct debuginfo *self = zalloc(sizeof(struct debuginfo)); +	if (!self)  		return NULL; -	do { -		ret = callback(die_mem, data); -		if (ret == DIE_FIND_CB_FOUND) -			return die_mem; - -		if ((ret & DIE_FIND_CB_CHILD) && -		    die_find_child(die_mem, callback, data, &child_die)) { -			memcpy(die_mem, &child_die, sizeof(Dwarf_Die)); -			return die_mem; -		} -	} while ((ret & DIE_FIND_CB_SIBLING) && -		 dwarf_siblingof(die_mem, die_mem) == 0); - -	return NULL; -} - -struct __addr_die_search_param { -	Dwarf_Addr	addr; -	Dwarf_Die	*die_mem; -}; - -static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) -{ -	struct __addr_die_search_param *ad = data; - -	if (dwarf_tag(fn_die) == DW_TAG_subprogram && -	    dwarf_haspc(fn_die, ad->addr)) { -		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); -		return DWARF_CB_ABORT; +	if (debuginfo__init_offline_dwarf(self, path) < 0) { +		free(self); +		self = NULL;  	} -	return DWARF_CB_OK; -} - -/* Search a real subprogram including this line, */ -static Dwarf_Die *die_find_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr, -					   Dwarf_Die *die_mem) -{ -	struct __addr_die_search_param ad; -	ad.addr = addr; -	ad.die_mem = die_mem; -	/* dwarf_getscopes can't find subprogram. */ -	if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0)) -		return NULL; -	else -		return die_mem; -} - -/* die_find callback for inline function search */ -static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data) -{ -	Dwarf_Addr *addr = data; - -	if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine && -	    dwarf_haspc(die_mem, *addr)) -		return DIE_FIND_CB_FOUND; -	return DIE_FIND_CB_CONTINUE; +	return self;  } -/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */ -static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, -				      Dwarf_Die *die_mem) +struct debuginfo *debuginfo__new_online_kernel(unsigned long addr)  { -	Dwarf_Die tmp_die; - -	sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die); -	if (!sp_die) +	struct debuginfo *self = zalloc(sizeof(struct debuginfo)); +	if (!self)  		return NULL; -	/* Inlined function could be recursive. Trace it until fail */ -	while (sp_die) { -		memcpy(die_mem, sp_die, sizeof(Dwarf_Die)); -		sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, -					&tmp_die); -	} - -	return die_mem; -} - -/* Walker on lines (Note: line number will not be sorted) */ -typedef int (* line_walk_handler_t) (const char *fname, int lineno, -				     Dwarf_Addr addr, void *data); - -struct __line_walk_param { -	const char *fname; -	line_walk_handler_t handler; -	void *data; -	int retval; -}; - -static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) -{ -	struct __line_walk_param *lw = data; -	Dwarf_Addr addr; -	int lineno; - -	if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { -		lineno = die_get_call_lineno(in_die); -		if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { -			lw->retval = lw->handler(lw->fname, lineno, addr, -						 lw->data); -			if (lw->retval != 0) -				return DIE_FIND_CB_FOUND; -		} -	} -	return DIE_FIND_CB_SIBLING; -} - -/* Walk on lines of blocks included in given DIE */ -static int __die_walk_funclines(Dwarf_Die *sp_die, -				line_walk_handler_t handler, void *data) -{ -	struct __line_walk_param lw = { -		.handler = handler, -		.data = data, -		.retval = 0, -	}; -	Dwarf_Die die_mem; -	Dwarf_Addr addr; -	int lineno; - -	/* Handle function declaration line */ -	lw.fname = dwarf_decl_file(sp_die); -	if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 && -	    dwarf_entrypc(sp_die, &addr) == 0) { -		lw.retval = handler(lw.fname, lineno, addr, data); -		if (lw.retval != 0) -			goto done; -	} -	die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem); -done: -	return lw.retval; -} - -static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) -{ -	struct __line_walk_param *lw = data; - -	lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data); -	if (lw->retval != 0) -		return DWARF_CB_ABORT; - -	return DWARF_CB_OK; -} - -/* - * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on - * the lines inside the subprogram, otherwise PDIE must be a CU DIE. - */ -static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler, -			  void *data) -{ -	Dwarf_Lines *lines; -	Dwarf_Line *line; -	Dwarf_Addr addr; -	const char *fname; -	int lineno, ret = 0; -	Dwarf_Die die_mem, *cu_die; -	size_t nlines, i; - -	/* Get the CU die */ -	if (dwarf_tag(pdie) == DW_TAG_subprogram) -		cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL); -	else -		cu_die = pdie; -	if (!cu_die) { -		pr_debug2("Failed to get CU from subprogram\n"); -		return -EINVAL; -	} - -	/* Get lines list in the CU */ -	if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) { -		pr_debug2("Failed to get source lines on this CU.\n"); -		return -ENOENT; -	} -	pr_debug2("Get %zd lines from this CU\n", nlines); - -	/* Walk on the lines on lines list */ -	for (i = 0; i < nlines; i++) { -		line = dwarf_onesrcline(lines, i); -		if (line == NULL || -		    dwarf_lineno(line, &lineno) != 0 || -		    dwarf_lineaddr(line, &addr) != 0) { -			pr_debug2("Failed to get line info. " -				  "Possible error in debuginfo.\n"); -			continue; -		} -		/* Filter lines based on address */ -		if (pdie != cu_die) -			/* -			 * Address filtering -			 * The line is included in given function, and -			 * no inline block includes it. -			 */ -			if (!dwarf_haspc(pdie, addr) || -			    die_find_inlinefunc(pdie, addr, &die_mem)) -				continue; -		/* Get source line */ -		fname = dwarf_linesrc(line, NULL, NULL); - -		ret = handler(fname, lineno, addr, data); -		if (ret != 0) -			return ret; -	} - -	/* -	 * Dwarf lines doesn't include function declarations and inlined -	 * subroutines. We have to check functions list or given function. -	 */ -	if (pdie != cu_die) -		ret = __die_walk_funclines(pdie, handler, data); -	else { -		struct __line_walk_param param = { -			.handler = handler, -			.data = data, -			.retval = 0, -		}; -		dwarf_getfuncs(cu_die, __die_walk_culines_cb, ¶m, 0); -		ret = param.retval; +	if (debuginfo__init_online_kernel_dwarf(self, (Dwarf_Addr)addr) < 0) { +		free(self); +		self = NULL;  	} -	return ret; -} - -struct __find_variable_param { -	const char *name; -	Dwarf_Addr addr; -}; - -static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) -{ -	struct __find_variable_param *fvp = data; -	int tag; - -	tag = dwarf_tag(die_mem); -	if ((tag == DW_TAG_formal_parameter || -	     tag == DW_TAG_variable) && -	    die_compare_name(die_mem, fvp->name)) -		return DIE_FIND_CB_FOUND; - -	if (dwarf_haspc(die_mem, fvp->addr)) -		return DIE_FIND_CB_CONTINUE; -	else -		return DIE_FIND_CB_SIBLING; -} - -/* Find a variable called 'name' at given address */ -static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, -				       Dwarf_Addr addr, Dwarf_Die *die_mem) -{ -	struct __find_variable_param fvp = { .name = name, .addr = addr}; - -	return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp, -			      die_mem); -} - -static int __die_find_member_cb(Dwarf_Die *die_mem, void *data) -{ -	const char *name = data; - -	if ((dwarf_tag(die_mem) == DW_TAG_member) && -	    die_compare_name(die_mem, name)) -		return DIE_FIND_CB_FOUND; - -	return DIE_FIND_CB_SIBLING; -} - -/* Find a member called 'name' */ -static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, -				  Dwarf_Die *die_mem) -{ -	return die_find_child(st_die, __die_find_member_cb, (void *)name, -			      die_mem); -} - -/* Get the name of given variable DIE */ -static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) -{ -	Dwarf_Die type; -	int tag, ret, ret2; -	const char *tmp = ""; - -	if (__die_get_real_type(vr_die, &type) == NULL) -		return -ENOENT; - -	tag = dwarf_tag(&type); -	if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) -		tmp = "*"; -	else if (tag == DW_TAG_subroutine_type) { -		/* Function pointer */ -		ret = snprintf(buf, len, "(function_type)"); -		return (ret >= len) ? -E2BIG : ret; -	} else { -		if (!dwarf_diename(&type)) -			return -ENOENT; -		if (tag == DW_TAG_union_type) -			tmp = "union "; -		else if (tag == DW_TAG_structure_type) -			tmp = "struct "; -		/* Write a base name */ -		ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); -		return (ret >= len) ? -E2BIG : ret; -	} -	ret = die_get_typename(&type, buf, len); -	if (ret > 0) { -		ret2 = snprintf(buf + ret, len - ret, "%s", tmp); -		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; -	} -	return ret; +	return self;  } -/* Get the name and type of given variable DIE, stored as "type\tname" */ -static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len) +void debuginfo__delete(struct debuginfo *self)  { -	int ret, ret2; - -	ret = die_get_typename(vr_die, buf, len); -	if (ret < 0) { -		pr_debug("Failed to get type, make it unknown.\n"); -		ret = snprintf(buf, len, "(unknown_type)"); +	if (self) { +		if (self->dwfl) +			dwfl_end(self->dwfl); +		free(self);  	} -	if (ret > 0) { -		ret2 = snprintf(buf + ret, len - ret, "\t%s", -				dwarf_diename(vr_die)); -		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; -	} -	return ret;  }  /* @@ -897,6 +374,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,  	struct probe_trace_arg_ref **ref_ptr = &tvar->ref;  	Dwarf_Die type;  	char buf[16]; +	int bsize, boffs, total;  	int ret;  	/* TODO: check all types */ @@ -906,11 +384,15 @@ static int convert_variable_type(Dwarf_Die *vr_die,  		return (tvar->type == NULL) ? -ENOMEM : 0;  	} -	if (die_get_bit_size(vr_die) != 0) { +	bsize = dwarf_bitsize(vr_die); +	if (bsize > 0) {  		/* This is a bitfield */ -		ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die), -				die_get_bit_offset(vr_die), -				BYTES_TO_BITS(die_get_byte_size(vr_die))); +		boffs = dwarf_bitoffset(vr_die); +		total = dwarf_bytesize(vr_die); +		if (boffs < 0 || total < 0) +			return -ENOENT; +		ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs, +				BYTES_TO_BITS(total));  		goto formatted;  	} @@ -958,10 +440,11 @@ static int convert_variable_type(Dwarf_Die *vr_die,  		return (tvar->type == NULL) ? -ENOMEM : 0;  	} -	ret = BYTES_TO_BITS(die_get_byte_size(&type)); -	if (!ret) +	ret = dwarf_bytesize(&type); +	if (ret <= 0)  		/* No size ... try to use default type */  		return 0; +	ret = BYTES_TO_BITS(ret);  	/* Check the bitwidth */  	if (ret > MAX_BASIC_TYPE_BITS) { @@ -1025,7 +508,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,  			else  				*ref_ptr = ref;  		} -		ref->offset += die_get_byte_size(&type) * field->index; +		ref->offset += dwarf_bytesize(&type) * field->index;  		if (!field->next)  			/* Save vr_die for converting types */  			memcpy(die_mem, vr_die, sizeof(*die_mem)); @@ -1245,8 +728,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)  	/* If no real subprogram, find a real one */  	if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { -		sp_die = die_find_real_subprogram(&pf->cu_die, -						  pf->addr, &die_mem); +		sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem);  		if (!sp_die) {  			pr_warning("Failed to find probe point in any "  				   "functions.\n"); @@ -1504,28 +986,18 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)  }  /* Find probe points from debuginfo */ -static int find_probes(int fd, struct probe_finder *pf) +static int debuginfo__find_probes(struct debuginfo *self, +				  struct probe_finder *pf)  {  	struct perf_probe_point *pp = &pf->pev->point;  	Dwarf_Off off, noff;  	size_t cuhl;  	Dwarf_Die *diep; -	Dwarf *dbg = NULL; -	Dwfl *dwfl; -	Dwarf_Addr bias;	/* Currently ignored */  	int ret = 0; -	dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); -	if (!dbg) { -		pr_warning("No debug information found in the vmlinux - " -			"please rebuild with CONFIG_DEBUG_INFO=y.\n"); -		close(fd);	/* Without dwfl_end(), fd isn't closed. */ -		return -EBADF; -	} -  #if _ELFUTILS_PREREQ(0, 142)  	/* Get the call frame information from this dwarf */ -	pf->cfi = dwarf_getcfi(dbg); +	pf->cfi = dwarf_getcfi(self->dbg);  #endif  	off = 0; @@ -1544,7 +1016,8 @@ static int find_probes(int fd, struct probe_finder *pf)  			.data = pf,  		}; -		dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0); +		dwarf_getpubnames(self->dbg, pubname_search_cb, +				  &pubname_param, 0);  		if (pubname_param.found) {  			ret = probe_point_search_cb(&pf->sp_die, &probe_param);  			if (ret) @@ -1553,9 +1026,9 @@ static int find_probes(int fd, struct probe_finder *pf)  	}  	/* Loop on CUs (Compilation Unit) */ -	while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { +	while (!dwarf_nextcu(self->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {  		/* Get the DIE(Debugging Information Entry) of this CU */ -		diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die); +		diep = dwarf_offdie(self->dbg, off + cuhl, &pf->cu_die);  		if (!diep)  			continue; @@ -1582,8 +1055,6 @@ static int find_probes(int fd, struct probe_finder *pf)  found:  	line_list__free(&pf->lcache); -	if (dwfl) -		dwfl_end(dwfl);  	return ret;  } @@ -1629,8 +1100,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)  }  /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -int find_probe_trace_events(int fd, struct perf_probe_event *pev, -			    struct probe_trace_event **tevs, int max_tevs) +int debuginfo__find_trace_events(struct debuginfo *self, +				 struct perf_probe_event *pev, +				 struct probe_trace_event **tevs, int max_tevs)  {  	struct trace_event_finder tf = {  			.pf = {.pev = pev, .callback = add_probe_trace_event}, @@ -1645,7 +1117,7 @@ int find_probe_trace_events(int fd, struct perf_probe_event *pev,  	tf.tevs = *tevs;  	tf.ntevs = 0; -	ret = find_probes(fd, &tf.pf); +	ret = debuginfo__find_probes(self, &tf.pf);  	if (ret < 0) {  		free(*tevs);  		*tevs = NULL; @@ -1739,9 +1211,10 @@ out:  }  /* Find available variables at given probe point */ -int find_available_vars_at(int fd, struct perf_probe_event *pev, -			   struct variable_list **vls, int max_vls, -			   bool externs) +int debuginfo__find_available_vars_at(struct debuginfo *self, +				      struct perf_probe_event *pev, +				      struct variable_list **vls, +				      int max_vls, bool externs)  {  	struct available_var_finder af = {  			.pf = {.pev = pev, .callback = add_available_vars}, @@ -1756,7 +1229,7 @@ int find_available_vars_at(int fd, struct perf_probe_event *pev,  	af.vls = *vls;  	af.nvls = 0; -	ret = find_probes(fd, &af.pf); +	ret = debuginfo__find_probes(self, &af.pf);  	if (ret < 0) {  		/* Free vlist for error */  		while (af.nvls--) { @@ -1774,28 +1247,19 @@ int find_available_vars_at(int fd, struct perf_probe_event *pev,  }  /* Reverse search */ -int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt) +int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, +				struct perf_probe_point *ppt)  {  	Dwarf_Die cudie, spdie, indie; -	Dwarf *dbg = NULL; -	Dwfl *dwfl = NULL; -	Dwarf_Addr _addr, baseaddr, bias = 0; +	Dwarf_Addr _addr, baseaddr;  	const char *fname = NULL, *func = NULL, *tmp;  	int baseline = 0, lineno = 0, ret = 0; -	/* Open the live linux kernel */ -	dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias); -	if (!dbg) { -		pr_warning("No debug information found in the vmlinux - " -			"please rebuild with CONFIG_DEBUG_INFO=y.\n"); -		ret = -EINVAL; -		goto end; -	} -  	/* Adjust address with bias */ -	addr += bias; +	addr += self->bias; +  	/* Find cu die */ -	if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) { +	if (!dwarf_addrdie(self->dbg, (Dwarf_Addr)addr - self->bias, &cudie)) {  		pr_warning("Failed to find debug information for address %lx\n",  			   addr);  		ret = -EINVAL; @@ -1807,7 +1271,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)  	/* Don't care whether it failed or not */  	/* Find a corresponding function (name, baseline and baseaddr) */ -	if (die_find_real_subprogram(&cudie, (Dwarf_Addr)addr, &spdie)) { +	if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {  		/* Get function entry information */  		tmp = dwarf_diename(&spdie);  		if (!tmp || @@ -1871,8 +1335,6 @@ post:  		}  	}  end: -	if (dwfl) -		dwfl_end(dwfl);  	if (ret == 0 && (fname || func))  		ret = 1;	/* Found a point */  	return ret; @@ -1982,26 +1444,15 @@ static int find_line_range_by_func(struct line_finder *lf)  	return param.retval;  } -int find_line_range(int fd, struct line_range *lr) +int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr)  {  	struct line_finder lf = {.lr = lr, .found = 0};  	int ret = 0;  	Dwarf_Off off = 0, noff;  	size_t cuhl;  	Dwarf_Die *diep; -	Dwarf *dbg = NULL; -	Dwfl *dwfl; -	Dwarf_Addr bias;	/* Currently ignored */  	const char *comp_dir; -	dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); -	if (!dbg) { -		pr_warning("No debug information found in the vmlinux - " -			"please rebuild with CONFIG_DEBUG_INFO=y.\n"); -		close(fd);	/* Without dwfl_end(), fd isn't closed. */ -		return -EBADF; -	} -  	/* Fastpath: lookup by function name from .debug_pubnames section */  	if (lr->function) {  		struct pubname_callback_param pubname_param = { @@ -2010,7 +1461,8 @@ int find_line_range(int fd, struct line_range *lr)  		struct dwarf_callback_param line_range_param = {  			.data = (void *)&lf, .retval = 0}; -		dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0); +		dwarf_getpubnames(self->dbg, pubname_search_cb, +				  &pubname_param, 0);  		if (pubname_param.found) {  			line_range_search_cb(&lf.sp_die, &line_range_param);  			if (lf.found) @@ -2020,11 +1472,12 @@ int find_line_range(int fd, struct line_range *lr)  	/* Loop on CUs (Compilation Unit) */  	while (!lf.found && ret >= 0) { -		if (dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) != 0) +		if (dwarf_nextcu(self->dbg, off, &noff, &cuhl, +				 NULL, NULL, NULL) != 0)  			break;  		/* Get the DIE(Debugging Information Entry) of this CU */ -		diep = dwarf_offdie(dbg, off + cuhl, &lf.cu_die); +		diep = dwarf_offdie(self->dbg, off + cuhl, &lf.cu_die);  		if (!diep)  			continue; @@ -2058,7 +1511,6 @@ found:  	}  	pr_debug("path: %s\n", lr->path); -	dwfl_end(dwfl);  	return (ret < 0) ? ret : lf.found;  } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 605730a366db..c478b42a2473 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -16,27 +16,42 @@ static inline int is_c_varname(const char *name)  }  #ifdef DWARF_SUPPORT + +#include "dwarf-aux.h" + +/* TODO: export debuginfo data structure even if no dwarf support */ + +/* debug information structure */ +struct debuginfo { +	Dwarf		*dbg; +	Dwfl		*dwfl; +	Dwarf_Addr	bias; +}; + +extern struct debuginfo *debuginfo__new(const char *path); +extern struct debuginfo *debuginfo__new_online_kernel(unsigned long addr); +extern void debuginfo__delete(struct debuginfo *self); +  /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -extern int find_probe_trace_events(int fd, struct perf_probe_event *pev, -				    struct probe_trace_event **tevs, -				    int max_tevs); +extern int debuginfo__find_trace_events(struct debuginfo *self, +					struct perf_probe_event *pev, +					struct probe_trace_event **tevs, +					int max_tevs);  /* Find a perf_probe_point from debuginfo */ -extern int find_perf_probe_point(unsigned long addr, -				 struct perf_probe_point *ppt); +extern int debuginfo__find_probe_point(struct debuginfo *self, +				       unsigned long addr, +				       struct perf_probe_point *ppt);  /* Find a line range */ -extern int find_line_range(int fd, struct line_range *lr); +extern int debuginfo__find_line_range(struct debuginfo *self, +				      struct line_range *lr);  /* Find available variables */ -extern int find_available_vars_at(int fd, struct perf_probe_event *pev, -				  struct variable_list **vls, int max_points, -				  bool externs); - -#include <dwarf.h> -#include <elfutils/libdw.h> -#include <elfutils/libdwfl.h> -#include <elfutils/version.h> +extern int debuginfo__find_available_vars_at(struct debuginfo *self, +					     struct perf_probe_event *pev, +					     struct variable_list **vls, +					     int max_points, bool externs);  struct probe_finder {  	struct perf_probe_event	*pev;		/* Target probe event */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a9ac0504aabd..8e0b5a39d8a7 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -247,7 +247,7 @@ struct pyrf_cpu_map {  static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,  			      PyObject *args, PyObject *kwargs)  { -	static char *kwlist[] = { "cpustr", NULL, NULL, }; +	static char *kwlist[] = { "cpustr", NULL };  	char *cpustr = NULL;  	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s", @@ -316,7 +316,7 @@ struct pyrf_thread_map {  static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,  				 PyObject *args, PyObject *kwargs)  { -	static char *kwlist[] = { "pid", "tid", NULL, NULL, }; +	static char *kwlist[] = { "pid", "tid", NULL };  	int pid = -1, tid = -1;  	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", @@ -418,7 +418,9 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,  		"wakeup_events",  		"bp_type",  		"bp_addr", -		"bp_len", NULL, NULL, }; +		"bp_len", +		 NULL +	};  	u64 sample_period = 0;  	u32 disabled = 0,  	    inherit = 0, @@ -499,7 +501,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,  	struct thread_map *threads = NULL;  	PyObject *pcpus = NULL, *pthreads = NULL;  	int group = 0, inherit = 0; -	static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL}; +	static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };  	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,  					 &pcpus, &pthreads, &group, &inherit)) @@ -582,8 +584,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,  				   PyObject *args, PyObject *kwargs)  {  	struct perf_evlist *evlist = &pevlist->evlist; -	static char *kwlist[] = {"pages", "overwrite", -				  NULL, NULL}; +	static char *kwlist[] = { "pages", "overwrite", NULL };  	int pages = 128, overwrite = false;  	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist, @@ -603,7 +604,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,  				   PyObject *args, PyObject *kwargs)  {  	struct perf_evlist *evlist = &pevlist->evlist; -	static char *kwlist[] = {"timeout", NULL, NULL}; +	static char *kwlist[] = { "timeout", NULL };  	int timeout = -1, n;  	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) @@ -674,7 +675,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,  	struct perf_evlist *evlist = &pevlist->evlist;  	union perf_event *event;  	int sample_id_all = 1, cpu; -	static char *kwlist[] = {"cpu", "sample_id_all", NULL, NULL}; +	static char *kwlist[] = { "cpu", "sample_id_all", NULL };  	int err;  	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f5a8fbdd3f76..72458d9da5b1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -12,6 +12,7 @@  #include "session.h"  #include "sort.h"  #include "util.h" +#include "cpumap.h"  static int perf_session__open(struct perf_session *self, bool force)  { @@ -247,9 +248,14 @@ int perf_session__resolve_callchain(struct perf_session *self,  	callchain_cursor_reset(&self->callchain_cursor);  	for (i = 0; i < chain->nr; i++) { -		u64 ip = chain->ips[i]; +		u64 ip;  		struct addr_location al; +		if (callchain_param.order == ORDER_CALLEE) +			ip = chain->ips[i]; +		else +			ip = chain->ips[chain->nr - i - 1]; +  		if (ip >= PERF_CONTEXT_MAX) {  			switch (ip) {  			case PERF_CONTEXT_HV: @@ -407,20 +413,26 @@ static void perf_event__read_swap(union perf_event *event)  	event->read.id		 = bswap_64(event->read.id);  } -static void perf_event__attr_swap(union perf_event *event) +/* exported for swapping attributes in file header */ +void perf_event__attr_swap(struct perf_event_attr *attr) +{ +	attr->type		= bswap_32(attr->type); +	attr->size		= bswap_32(attr->size); +	attr->config		= bswap_64(attr->config); +	attr->sample_period	= bswap_64(attr->sample_period); +	attr->sample_type	= bswap_64(attr->sample_type); +	attr->read_format	= bswap_64(attr->read_format); +	attr->wakeup_events	= bswap_32(attr->wakeup_events); +	attr->bp_type		= bswap_32(attr->bp_type); +	attr->bp_addr		= bswap_64(attr->bp_addr); +	attr->bp_len		= bswap_64(attr->bp_len); +} + +static void perf_event__hdr_attr_swap(union perf_event *event)  {  	size_t size; -	event->attr.attr.type		= bswap_32(event->attr.attr.type); -	event->attr.attr.size		= bswap_32(event->attr.attr.size); -	event->attr.attr.config		= bswap_64(event->attr.attr.config); -	event->attr.attr.sample_period	= bswap_64(event->attr.attr.sample_period); -	event->attr.attr.sample_type	= bswap_64(event->attr.attr.sample_type); -	event->attr.attr.read_format	= bswap_64(event->attr.attr.read_format); -	event->attr.attr.wakeup_events	= bswap_32(event->attr.attr.wakeup_events); -	event->attr.attr.bp_type	= bswap_32(event->attr.attr.bp_type); -	event->attr.attr.bp_addr	= bswap_64(event->attr.attr.bp_addr); -	event->attr.attr.bp_len		= bswap_64(event->attr.attr.bp_len); +	perf_event__attr_swap(&event->attr.attr);  	size = event->header.size;  	size -= (void *)&event->attr.id - (void *)event; @@ -448,7 +460,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {  	[PERF_RECORD_LOST]		  = perf_event__all64_swap,  	[PERF_RECORD_READ]		  = perf_event__read_swap,  	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap, -	[PERF_RECORD_HEADER_ATTR]	  = perf_event__attr_swap, +	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,  	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,  	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,  	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL, @@ -708,9 +720,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,  	if (!dump_trace)  		return; -	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n", +	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",  	       event->header.misc, sample->pid, sample->tid, sample->ip, -	       sample->period); +	       sample->period, sample->addr);  	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)  		callchain__printf(sample); @@ -1202,9 +1214,10 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,  	return NULL;  } -void perf_session__print_symbols(union perf_event *event, -				struct perf_sample *sample, -				struct perf_session *session) +void perf_session__print_ip(union perf_event *event, +			    struct perf_sample *sample, +			    struct perf_session *session, +			    int print_sym, int print_dso)  {  	struct addr_location al;  	const char *symname, *dsoname; @@ -1233,32 +1246,83 @@ void perf_session__print_symbols(union perf_event *event,  			if (!node)  				break; -			if (node->sym && node->sym->name) -				symname = node->sym->name; +			printf("\t%16" PRIx64, node->ip); +			if (print_sym) { +				if (node->sym && node->sym->name) +					symname = node->sym->name; +				else +					symname = ""; + +				printf(" %s", symname); +			} +			if (print_dso) { +				if (node->map && node->map->dso && node->map->dso->name) +					dsoname = node->map->dso->name; +				else +					dsoname = ""; + +				printf(" (%s)", dsoname); +			} +			printf("\n"); + +			callchain_cursor_advance(cursor); +		} + +	} else { +		printf("%16" PRIx64, sample->ip); +		if (print_sym) { +			if (al.sym && al.sym->name) +				symname = al.sym->name;  			else  				symname = ""; -			if (node->map && node->map->dso && node->map->dso->name) -				dsoname = node->map->dso->name; +			printf(" %s", symname); +		} + +		if (print_dso) { +			if (al.map && al.map->dso && al.map->dso->name) +				dsoname = al.map->dso->name;  			else  				dsoname = ""; -			printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname); +			printf(" (%s)", dsoname); +		} +	} +} -			callchain_cursor_advance(cursor); +int perf_session__cpu_bitmap(struct perf_session *session, +			     const char *cpu_list, unsigned long *cpu_bitmap) +{ +	int i; +	struct cpu_map *map; + +	for (i = 0; i < PERF_TYPE_MAX; ++i) { +		struct perf_evsel *evsel; + +		evsel = perf_session__find_first_evtype(session, i); +		if (!evsel) +			continue; + +		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { +			pr_err("File does not contain CPU events. " +			       "Remove -c option to proceed.\n"); +			return -1;  		} +	} -	} else { -		if (al.sym && al.sym->name) -			symname = al.sym->name; -		else -			symname = ""; +	map = cpu_map__new(cpu_list); -		if (al.map && al.map->dso && al.map->dso->name) -			dsoname = al.map->dso->name; -		else -			dsoname = ""; +	for (i = 0; i < map->nr; i++) { +		int cpu = map->map[i]; + +		if (cpu >= MAX_NR_CPUS) { +			pr_err("Requested CPU %d too large. " +			       "Consider raising MAX_NR_CPUS\n", cpu); +			return -1; +		} -		printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname); +		set_bit(cpu, cpu_bitmap);  	} + +	return 0;  } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 66d4e1490879..170601e67d6b 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -112,6 +112,7 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,  					     u64 addr);  void mem_bswap_64(void *src, int byte_size); +void perf_event__attr_swap(struct perf_event_attr *attr);  int perf_session__create_kernel_maps(struct perf_session *self); @@ -167,8 +168,12 @@ static inline int perf_session__parse_sample(struct perf_session *session,  struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,  					    unsigned int type); -void perf_session__print_symbols(union perf_event *event, +void perf_session__print_ip(union perf_event *event,  				 struct perf_sample *sample, -				 struct perf_session *session); +				 struct perf_session *session, +				 int print_sym, int print_dso); + +int perf_session__cpu_bitmap(struct perf_session *session, +			     const char *cpu_list, unsigned long *cpu_bitmap);  #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f44fa541d56e..401e220566fd 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -15,95 +15,6 @@ char * field_sep;  LIST_HEAD(hist_entry__sort_list); -static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf, -				       size_t size, unsigned int width); -static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, -				     size_t size, unsigned int width); -static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, -				    size_t size, unsigned int width); -static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, -				    size_t size, unsigned int width); -static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf, -				       size_t size, unsigned int width); -static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf, -				    size_t size, unsigned int width); - -struct sort_entry sort_thread = { -	.se_header	= "Command:  Pid", -	.se_cmp		= sort__thread_cmp, -	.se_snprintf	= hist_entry__thread_snprintf, -	.se_width_idx	= HISTC_THREAD, -}; - -struct sort_entry sort_comm = { -	.se_header	= "Command", -	.se_cmp		= sort__comm_cmp, -	.se_collapse	= sort__comm_collapse, -	.se_snprintf	= hist_entry__comm_snprintf, -	.se_width_idx	= HISTC_COMM, -}; - -struct sort_entry sort_dso = { -	.se_header	= "Shared Object", -	.se_cmp		= sort__dso_cmp, -	.se_snprintf	= hist_entry__dso_snprintf, -	.se_width_idx	= HISTC_DSO, -}; - -struct sort_entry sort_sym = { -	.se_header	= "Symbol", -	.se_cmp		= sort__sym_cmp, -	.se_snprintf	= hist_entry__sym_snprintf, -	.se_width_idx	= HISTC_SYMBOL, -}; - -struct sort_entry sort_parent = { -	.se_header	= "Parent symbol", -	.se_cmp		= sort__parent_cmp, -	.se_snprintf	= hist_entry__parent_snprintf, -	.se_width_idx	= HISTC_PARENT, -}; -  -struct sort_entry sort_cpu = { -	.se_header      = "CPU", -	.se_cmp	        = sort__cpu_cmp, -	.se_snprintf    = hist_entry__cpu_snprintf, -	.se_width_idx	= HISTC_CPU, -}; - -struct sort_dimension { -	const char		*name; -	struct sort_entry	*entry; -	int			taken; -}; - -static struct sort_dimension sort_dimensions[] = { -	{ .name = "pid",	.entry = &sort_thread,	}, -	{ .name = "comm",	.entry = &sort_comm,	}, -	{ .name = "dso",	.entry = &sort_dso,	}, -	{ .name = "symbol",	.entry = &sort_sym,	}, -	{ .name = "parent",	.entry = &sort_parent,	}, -	{ .name = "cpu",	.entry = &sort_cpu,	}, -}; - -int64_t cmp_null(void *l, void *r) -{ -	if (!l && !r) -		return 0; -	else if (!l) -		return -1; -	else -		return 1; -} - -/* --sort pid */ - -int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ -	return right->thread->pid - left->thread->pid; -} -  static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)  {  	int n; @@ -125,6 +36,24 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)  	return n;  } +static int64_t cmp_null(void *l, void *r) +{ +	if (!l && !r) +		return 0; +	else if (!l) +		return -1; +	else +		return 1; +} + +/* --sort pid */ + +static int64_t +sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) +{ +	return right->thread->pid - left->thread->pid; +} +  static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,  				       size_t size, unsigned int width)  { @@ -132,15 +61,50 @@ static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,  			      self->thread->comm ?: "", self->thread->pid);  } +struct sort_entry sort_thread = { +	.se_header	= "Command:  Pid", +	.se_cmp		= sort__thread_cmp, +	.se_snprintf	= hist_entry__thread_snprintf, +	.se_width_idx	= HISTC_THREAD, +}; + +/* --sort comm */ + +static int64_t +sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) +{ +	return right->thread->pid - left->thread->pid; +} + +static int64_t +sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) +{ +	char *comm_l = left->thread->comm; +	char *comm_r = right->thread->comm; + +	if (!comm_l || !comm_r) +		return cmp_null(comm_l, comm_r); + +	return strcmp(comm_l, comm_r); +} +  static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,  				     size_t size, unsigned int width)  {  	return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);  } +struct sort_entry sort_comm = { +	.se_header	= "Command", +	.se_cmp		= sort__comm_cmp, +	.se_collapse	= sort__comm_collapse, +	.se_snprintf	= hist_entry__comm_snprintf, +	.se_width_idx	= HISTC_COMM, +}; +  /* --sort dso */ -int64_t +static int64_t  sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)  {  	struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; @@ -173,9 +137,16 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,  	return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");  } +struct sort_entry sort_dso = { +	.se_header	= "Shared Object", +	.se_cmp		= sort__dso_cmp, +	.se_snprintf	= hist_entry__dso_snprintf, +	.se_width_idx	= HISTC_DSO, +}; +  /* --sort symbol */ -int64_t +static int64_t  sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)  {  	u64 ip_l, ip_r; @@ -211,29 +182,16 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,  	return ret;  } -/* --sort comm */ - -int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ -	return right->thread->pid - left->thread->pid; -} - -int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ -	char *comm_l = left->thread->comm; -	char *comm_r = right->thread->comm; - -	if (!comm_l || !comm_r) -		return cmp_null(comm_l, comm_r); - -	return strcmp(comm_l, comm_r); -} +struct sort_entry sort_sym = { +	.se_header	= "Symbol", +	.se_cmp		= sort__sym_cmp, +	.se_snprintf	= hist_entry__sym_snprintf, +	.se_width_idx	= HISTC_SYMBOL, +};  /* --sort parent */ -int64_t +static int64_t  sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)  {  	struct symbol *sym_l = left->parent; @@ -252,9 +210,16 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,  			      self->parent ? self->parent->name : "[other]");  } +struct sort_entry sort_parent = { +	.se_header	= "Parent symbol", +	.se_cmp		= sort__parent_cmp, +	.se_snprintf	= hist_entry__parent_snprintf, +	.se_width_idx	= HISTC_PARENT, +}; +  /* --sort cpu */ -int64_t +static int64_t  sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)  {  	return right->cpu - left->cpu; @@ -266,6 +231,28 @@ static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,  	return repsep_snprintf(bf, size, "%-*d", width, self->cpu);  } +struct sort_entry sort_cpu = { +	.se_header      = "CPU", +	.se_cmp	        = sort__cpu_cmp, +	.se_snprintf    = hist_entry__cpu_snprintf, +	.se_width_idx	= HISTC_CPU, +}; + +struct sort_dimension { +	const char		*name; +	struct sort_entry	*entry; +	int			taken; +}; + +static struct sort_dimension sort_dimensions[] = { +	{ .name = "pid",	.entry = &sort_thread,	}, +	{ .name = "comm",	.entry = &sort_comm,	}, +	{ .name = "dso",	.entry = &sort_dso,	}, +	{ .name = "symbol",	.entry = &sort_sym,	}, +	{ .name = "parent",	.entry = &sort_parent,	}, +	{ .name = "cpu",	.entry = &sort_cpu,	}, +}; +  int sort_dimension__add(const char *tok)  {  	unsigned int i; @@ -273,15 +260,9 @@ int sort_dimension__add(const char *tok)  	for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {  		struct sort_dimension *sd = &sort_dimensions[i]; -		if (sd->taken) -			continue; -  		if (strncasecmp(tok, sd->name, strlen(tok)))  			continue; -		if (sd->entry->se_collapse) -			sort__need_collapse = 1; -  		if (sd->entry == &sort_parent) {  			int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);  			if (ret) { @@ -294,6 +275,12 @@ int sort_dimension__add(const char *tok)  			sort__has_parent = 1;  		} +		if (sd->taken) +			return 0; + +		if (sd->entry->se_collapse) +			sort__need_collapse = 1; +  		if (list_empty(&hist_entry__sort_list)) {  			if (!strcmp(sd->name, "pid"))  				sort__first_dimension = SORT_PID; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 0b91053a7d11..77d0388ad415 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -103,20 +103,6 @@ extern struct sort_entry sort_thread;  extern struct list_head hist_entry__sort_list;  void setup_sorting(const char * const usagestr[], const struct option *opts); - -extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int); -extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int); -extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int); -extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used); -extern int64_t cmp_null(void *, void *); -extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *); -extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *); -extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *); -extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *); -extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *); -extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *); -int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right); -extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);  extern int sort_dimension__add(const char *);  void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,  			     const char *list_name, FILE *fp); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index b9a985dadd08..d5836382ff2c 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -294,3 +294,22 @@ bool strlazymatch(const char *str, const char *pat)  {  	return __match_glob(str, pat, true);  } + +/** + * strtailcmp - Compare the tail of two strings + * @s1: 1st string to be compared + * @s2: 2nd string to be compared + * + * Return 0 if whole of either string is same as another's tail part. + */ +int strtailcmp(const char *s1, const char *s2) +{ +	int i1 = strlen(s1); +	int i2 = strlen(s2); +	while (--i1 >= 0 && --i2 >= 0) { +		if (s1[i1] != s2[i2]) +			return s1[i1] - s2[i2]; +	} +	return 0; +} + diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 35729f4c40cb..3403f814ad72 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -183,106 +183,59 @@ int bigendian(void)  	return *ptr == 0x01020304;  } -static unsigned long long copy_file_fd(int fd) +/* unfortunately, you can not stat debugfs or proc files for size */ +static void record_file(const char *file, size_t hdr_sz)  {  	unsigned long long size = 0; -	char buf[BUFSIZ]; -	int r; - -	do { -		r = read(fd, buf, BUFSIZ); -		if (r > 0) { -			size += r; -			write_or_die(buf, r); -		} -	} while (r > 0); - -	return size; -} - -static unsigned long long copy_file(const char *file) -{ -	unsigned long long size = 0; -	int fd; +	char buf[BUFSIZ], *sizep; +	off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR); +	int r, fd;  	fd = open(file, O_RDONLY);  	if (fd < 0)  		die("Can't read '%s'", file); -	size = copy_file_fd(fd); -	close(fd); -	return size; -} - -static unsigned long get_size_fd(int fd) -{ -	unsigned long long size = 0; -	char buf[BUFSIZ]; -	int r; +	/* put in zeros for file size, then fill true size later */ +	write_or_die(&size, hdr_sz);  	do {  		r = read(fd, buf, BUFSIZ); -		if (r > 0) +		if (r > 0) {  			size += r; +			write_or_die(buf, r); +		}  	} while (r > 0); - -	lseek(fd, 0, SEEK_SET); - -	return size; -} - -static unsigned long get_size(const char *file) -{ -	unsigned long long size = 0; -	int fd; - -	fd = open(file, O_RDONLY); -	if (fd < 0) -		die("Can't read '%s'", file); -	size = get_size_fd(fd);  	close(fd); -	return size; +	/* ugh, handle big-endian hdr_size == 4 */ +	sizep = (char*)&size; +	if (bigendian()) +		sizep += sizeof(u64) - hdr_sz; + +	if (pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) +		die("writing to %s", output_file);  }  static void read_header_files(void)  { -	unsigned long long size, check_size;  	char *path; -	int fd; +	struct stat st;  	path = get_tracing_file("events/header_page"); -	fd = open(path, O_RDONLY); -	if (fd < 0) +	if (stat(path, &st) < 0)  		die("can't read '%s'", path); -	/* unfortunately, you can not stat debugfs files for size */ -	size = get_size_fd(fd); -  	write_or_die("header_page", 12); -	write_or_die(&size, 8); -	check_size = copy_file_fd(fd); -	close(fd); - -	if (size != check_size) -		die("wrong size for '%s' size=%lld read=%lld", -		    path, size, check_size); +	record_file(path, 8);  	put_tracing_file(path);  	path = get_tracing_file("events/header_event"); -	fd = open(path, O_RDONLY); -	if (fd < 0) +	if (stat(path, &st) < 0)  		die("can't read '%s'", path); -	size = get_size_fd(fd); -  	write_or_die("header_event", 13); -	write_or_die(&size, 8); -	check_size = copy_file_fd(fd); -	if (size != check_size) -		die("wrong size for '%s'", path); +	record_file(path, 8);  	put_tracing_file(path); -	close(fd);  }  static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) @@ -298,7 +251,6 @@ static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)  static void copy_event_system(const char *sys, struct tracepoint_path *tps)  { -	unsigned long long size, check_size;  	struct dirent *dent;  	struct stat st;  	char *format; @@ -338,14 +290,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)  		sprintf(format, "%s/%s/format", sys, dent->d_name);  		ret = stat(format, &st); -		if (ret >= 0) { -			/* unfortunately, you can not stat debugfs files for size */ -			size = get_size(format); -			write_or_die(&size, 8); -			check_size = copy_file(format); -			if (size != check_size) -				die("error in size of file '%s'", format); -		} +		if (ret >= 0) +			record_file(format, 8);  		free(format);  	} @@ -426,7 +372,7 @@ static void read_event_files(struct tracepoint_path *tps)  static void read_proc_kallsyms(void)  { -	unsigned int size, check_size; +	unsigned int size;  	const char *path = "/proc/kallsyms";  	struct stat st;  	int ret; @@ -438,17 +384,12 @@ static void read_proc_kallsyms(void)  		write_or_die(&size, 4);  		return;  	} -	size = get_size(path); -	write_or_die(&size, 4); -	check_size = copy_file(path); -	if (size != check_size) -		die("error in size of file '%s'", path); - +	record_file(path, 4);  }  static void read_ftrace_printk(void)  { -	unsigned int size, check_size; +	unsigned int size;  	char *path;  	struct stat st;  	int ret; @@ -461,11 +402,8 @@ static void read_ftrace_printk(void)  		write_or_die(&size, 4);  		goto out;  	} -	size = get_size(path); -	write_or_die(&size, 4); -	check_size = copy_file(path); -	if (size != check_size) -		die("error in size of file '%s'", path); +	record_file(path, 4); +  out:  	put_tracing_file(path);  } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index fc784284ac8b..0128906bac88 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -238,6 +238,7 @@ char **argv_split(const char *str, int *argcp);  void argv_free(char **argv);  bool strglobmatch(const char *str, const char *pat);  bool strlazymatch(const char *str, const char *pat); +int strtailcmp(const char *s1, const char *s2);  unsigned long convert_unit(unsigned long value, char *unit);  int readn(int fd, void *buf, size_t size);  |