diff options
Diffstat (limited to 'arch/sparc')
-rw-r--r-- | arch/sparc/Kconfig | 2 | ||||
-rw-r--r-- | arch/sparc/boot/btfixupprep.c | 8 | ||||
-rw-r--r-- | arch/sparc/boot/piggyback_32.c | 10 | ||||
-rw-r--r-- | arch/sparc/boot/piggyback_64.c | 2 | ||||
-rw-r--r-- | arch/sparc/include/asm/hardirq_32.h | 12 | ||||
-rw-r--r-- | arch/sparc/include/asm/irq_32.h | 4 | ||||
-rw-r--r-- | arch/sparc/include/asm/pgtable_64.h | 4 | ||||
-rw-r--r-- | arch/sparc/include/asm/system_64.h | 4 | ||||
-rw-r--r-- | arch/sparc/kernel/ktlb.S | 8 | ||||
-rw-r--r-- | arch/sparc/kernel/ldc.c | 4 | ||||
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 579 | ||||
-rw-r--r-- | arch/sparc/kernel/prom_common.c | 4 | ||||
-rw-r--r-- | arch/sparc/kernel/visemul.c | 2 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.h | 2 | ||||
-rw-r--r-- | arch/sparc/oprofile/init.c | 1 |
16 files changed, 571 insertions, 77 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ac45aab741a5..05ef5380a687 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -26,6 +26,7 @@ config SPARC select RTC_CLASS select RTC_DRV_M48T59 select HAVE_PERF_EVENTS + select PERF_USE_VMALLOC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG @@ -48,6 +49,7 @@ config SPARC64 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE select HAVE_PERF_EVENTS + select PERF_USE_VMALLOC config ARCH_DEFCONFIG string diff --git a/arch/sparc/boot/btfixupprep.c b/arch/sparc/boot/btfixupprep.c index 52a4208fe4f0..bbf91b9c3d39 100644 --- a/arch/sparc/boot/btfixupprep.c +++ b/arch/sparc/boot/btfixupprep.c @@ -61,14 +61,14 @@ unsigned long lastfoffset = -1; unsigned long lastfrelno; btfixup *lastf; -void fatal(void) __attribute__((noreturn)); -void fatal(void) +static void fatal(void) __attribute__((noreturn)); +static void fatal(void) { fprintf(stderr, "Malformed output from objdump\n%s\n", buffer); exit(1); } -btfixup *find(int type, char *name) +static btfixup *find(int type, char *name) { int i; for (i = 0; i < last; i++) { @@ -88,7 +88,7 @@ btfixup *find(int type, char *name) return array + last - 1; } -void set_mode (char *buffer) +static void set_mode (char *buffer) { for (mode = 0;; mode++) if (buffer[mode] < '0' || buffer[mode] > '9') diff --git a/arch/sparc/boot/piggyback_32.c b/arch/sparc/boot/piggyback_32.c index e8dc9adfcd61..ac944aec7301 100644 --- a/arch/sparc/boot/piggyback_32.c +++ b/arch/sparc/boot/piggyback_32.c @@ -35,17 +35,17 @@ * as PROM looks for a.out image only. */ -unsigned short ld2(char *p) +static unsigned short ld2(char *p) { return (p[0] << 8) | p[1]; } -unsigned int ld4(char *p) +static unsigned int ld4(char *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; } -void st4(char *p, unsigned int x) +static void st4(char *p, unsigned int x) { p[0] = x >> 24; p[1] = x >> 16; @@ -53,7 +53,7 @@ void st4(char *p, unsigned int x) p[3] = x; } -void usage(void) +static void usage(void) { /* fs_img.gz is an image of initial ramdisk. */ fprintf(stderr, "Usage: piggyback vmlinux.aout System.map fs_img.gz\n"); @@ -61,7 +61,7 @@ void usage(void) exit(1); } -void die(char *str) +static void die(char *str) { perror (str); exit(1); diff --git a/arch/sparc/boot/piggyback_64.c b/arch/sparc/boot/piggyback_64.c index c63fd1b6bdd4..a26a686cb5aa 100644 --- a/arch/sparc/boot/piggyback_64.c +++ b/arch/sparc/boot/piggyback_64.c @@ -32,7 +32,7 @@ /* Note: run this on an a.out kernel (use elftoaout for it), as PROM looks for a.out image onlly usage: piggyback vmlinux System.map tail, where tail is gzipped fs of the initial ramdisk */ -void die(char *str) +static void die(char *str) { perror (str); exit(1); diff --git a/arch/sparc/include/asm/hardirq_32.h b/arch/sparc/include/asm/hardirq_32.h index 4f63ed8df551..162007643cdc 100644 --- a/arch/sparc/include/asm/hardirq_32.h +++ b/arch/sparc/include/asm/hardirq_32.h @@ -7,17 +7,7 @@ #ifndef __SPARC_HARDIRQ_H #define __SPARC_HARDIRQ_H -#include <linux/threads.h> -#include <linux/spinlock.h> -#include <linux/cache.h> - -/* entry.S is sensitive to the offsets of these fields */ /* XXX P3 Is it? */ -typedef struct { - unsigned int __softirq_pending; -} ____cacheline_aligned irq_cpustat_t; - -#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ - #define HARDIRQ_BITS 8 +#include <asm-generic/hardirq.h> #endif /* __SPARC_HARDIRQ_H */ diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index ea43057d4763..cbf4801deaaf 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -6,10 +6,10 @@ #ifndef _SPARC_IRQ_H #define _SPARC_IRQ_H -#include <linux/interrupt.h> - #define NR_IRQS 16 +#include <linux/interrupt.h> + #define irq_canonicalize(irq) (irq) extern void __init init_IRQ(void); diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 0ff92fa22064..f3cb790fa2ae 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -41,8 +41,8 @@ #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000000200000000,UL) -#define VMEMMAP_BASE _AC(0x0000000200000000,UL) +#define VMALLOC_END _AC(0x0000010000000000,UL) +#define VMEMMAP_BASE _AC(0x0000010000000000,UL) #define vmemmap ((struct page *)VMEMMAP_BASE) diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h index 25e848f0cad7..d47a98e66972 100644 --- a/arch/sparc/include/asm/system_64.h +++ b/arch/sparc/include/asm/system_64.h @@ -63,6 +63,10 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ : : : "memory"); \ } while (0) +/* The kernel always executes in TSO memory model these days, + * and furthermore most sparc64 chips implement more stringent + * memory ordering than required by the specifications. + */ #define mb() membar_safe("#StoreLoad") #define rmb() __asm__ __volatile__("":::"memory") #define wmb() __asm__ __volatile__("":::"memory") diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 3ea6e8cde8c5..1d361477d7d6 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -280,8 +280,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 24), %g5 - sllx %g5, 24, %g5 + mov (VMEMMAP_BASE >> 40), %g5 + sllx %g5, 40, %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -293,8 +293,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 24), %g5 - sllx %g5, 24, %g5 + mov (VMALLOC_END >> 40), %g5 + sllx %g5, 40, %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index adf5f273868a..cb3c72c45aab 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1242,13 +1242,13 @@ int ldc_bind(struct ldc_channel *lp, const char *name) snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); err = request_irq(lp->cfg.rx_irq, ldc_rx, - IRQF_SAMPLE_RANDOM | IRQF_SHARED, + IRQF_SAMPLE_RANDOM | IRQF_DISABLED | IRQF_SHARED, lp->rx_irq_name, lp); if (err) return err; err = request_irq(lp->cfg.tx_irq, ldc_tx, - IRQF_SAMPLE_RANDOM | IRQF_SHARED, + IRQF_SAMPLE_RANDOM | IRQF_DISABLED | IRQF_SHARED, lp->tx_irq_name, lp); if (err) { free_irq(lp->cfg.rx_irq, lp); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2d6a1b10c81d..fa5936e1c3b9 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -56,7 +56,8 @@ struct cpu_hw_events { struct perf_event *events[MAX_HWEVENTS]; unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; - int enabled; + u64 pcr; + int enabled; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -68,8 +69,30 @@ struct perf_event_map { #define PIC_LOWER 0x02 }; +static unsigned long perf_event_encode(const struct perf_event_map *pmap) +{ + return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; +} + +static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) +{ + *msk = val & 0xff; + *enc = val >> 16; +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +#define CACHE_OP_UNSUPPORTED 0xfffe +#define CACHE_OP_NONSENSE 0xffff + +typedef struct perf_event_map cache_map_t + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + struct sparc_pmu { const struct perf_event_map *(*event_map)(int); + const cache_map_t *cache_map; int max_events; int upper_shift; int lower_shift; @@ -80,21 +103,109 @@ struct sparc_pmu { int lower_nop; }; -static const struct perf_event_map ultra3i_perfmon_event_map[] = { +static const struct perf_event_map ultra3_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, }; -static const struct perf_event_map *ultra3i_event_map(int event_id) +static const struct perf_event_map *ultra3_event_map(int event_id) { - return &ultra3i_perfmon_event_map[event_id]; + return &ultra3_perfmon_event_map[event_id]; } -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), +static const cache_map_t ultra3_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static const struct sparc_pmu ultra3_pmu = { + .event_map = ultra3_event_map, + .cache_map = &ultra3_cache_map, + .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, @@ -102,6 +213,121 @@ static const struct sparc_pmu ultra3i_pmu = { .lower_nop = 0x14, }; +/* Niagara1 is very limited. The upper PIC is hard-locked to count + * only instructions, so it is free running which creates all kinds of + * problems. Some hardware designs make one wonder if the creator + * even looked at how this stuff gets used by software. + */ +static const struct perf_event_map niagara1_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, +}; + +static const struct perf_event_map *niagara1_event_map(int event_id) +{ + return &niagara1_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara1_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, + [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static const struct sparc_pmu niagara1_pmu = { + .event_map = niagara1_event_map, + .cache_map = &niagara1_cache_map, + .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), + .upper_shift = 0, + .lower_shift = 4, + .event_mask = 0x7, + .upper_nop = 0x0, + .lower_nop = 0x0, +}; + static const struct perf_event_map niagara2_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, @@ -116,14 +342,102 @@ static const struct perf_event_map *niagara2_event_map(int event_id) return &niagara2_perfmon_event_map[event_id]; } +static const cache_map_t niagara2_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + static const struct sparc_pmu niagara2_pmu = { .event_map = niagara2_event_map, + .cache_map = &niagara2_cache_map, .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), .upper_shift = 19, .lower_shift = 6, .event_mask = 0xfff, .hv_bit = 0x8, - .irq_bit = 0x03, + .irq_bit = 0x30, .upper_nop = 0x220, .lower_nop = 0x220, }; @@ -151,23 +465,30 @@ static u64 nop_for_index(int idx) sparc_pmu->lower_nop, idx); } -static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); + val = cpuc->pcr; + val &= ~mask; + val |= hwc->config; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } -static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); + u64 val; - pcr_ops->write((val & ~mask) | nop); + val = cpuc->pcr; + val &= ~mask; + val |= nop; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_enable(void) @@ -182,7 +503,7 @@ void hw_perf_enable(void) cpuc->enabled = 1; barrier(); - val = pcr_ops->read(); + val = cpuc->pcr; for (i = 0; i < MAX_HWEVENTS; i++) { struct perf_event *cp = cpuc->events[i]; @@ -194,7 +515,9 @@ void hw_perf_enable(void) val |= hwc->config_base; } - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_disable(void) @@ -207,10 +530,12 @@ void hw_perf_disable(void) cpuc->enabled = 0; - val = pcr_ops->read(); + val = cpuc->pcr; val &= ~(PCR_UTRACE | PCR_STRACE | sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } static u32 read_pmc(int idx) @@ -242,7 +567,7 @@ static void write_pmc(int idx, u64 val) } static int sparc_perf_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; @@ -282,19 +607,19 @@ static int sparc_pmu_enable(struct perf_event *event) if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); sparc_perf_event_set_period(event, hwc, idx); - sparc_pmu_enable_event(hwc, idx); + sparc_pmu_enable_event(cpuc, hwc, idx); perf_event_update_userpage(event); return 0; } static u64 sparc_perf_event_update(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { int shift = 64 - 32; u64 prev_raw_count, new_raw_count; @@ -324,7 +649,7 @@ static void sparc_pmu_disable(struct perf_event *event) int idx = hwc->idx; clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); barrier(); @@ -338,18 +663,29 @@ static void sparc_pmu_disable(struct perf_event *event) static void sparc_pmu_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); } static void sparc_pmu_unthrottle(struct perf_event *event) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - sparc_pmu_enable_event(hwc, hwc->idx); + + sparc_pmu_enable_event(cpuc, hwc, hwc->idx); } static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); +static void perf_stop_nmi_watchdog(void *unused) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + stop_nmi_watchdog(NULL); + cpuc->pcr = pcr_ops->read(); +} + void perf_event_grab_pmc(void) { if (atomic_inc_not_zero(&active_events)) @@ -358,7 +694,7 @@ void perf_event_grab_pmc(void) mutex_lock(&pmc_grab_mutex); if (atomic_read(&active_events) == 0) { if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); + on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); BUG_ON(atomic_read(&nmi_active) != 0); } atomic_inc(&active_events); @@ -375,30 +711,160 @@ void perf_event_release_pmc(void) } } +static const struct perf_event_map *sparc_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct perf_event_map *pmap; + + if (!sparc_pmu->cache_map) + return ERR_PTR(-ENOENT); + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); + + if (pmap->encoding == CACHE_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + if (pmap->encoding == CACHE_OP_NONSENSE) + return ERR_PTR(-EINVAL); + + return pmap; +} + static void hw_perf_event_destroy(struct perf_event *event) { perf_event_release_pmc(); } +/* Make sure all events can be scheduled into the hardware at + * the same time. This is simplified by the fact that we only + * need to support 2 simultaneous HW events. + */ +static int sparc_check_constraints(unsigned long *events, int n_ev) +{ + if (n_ev <= perf_max_events) { + u8 msk1, msk2; + u16 dummy; + + if (n_ev == 1) + return 0; + BUG_ON(n_ev != 2); + perf_event_decode(events[0], &dummy, &msk1); + perf_event_decode(events[1], &dummy, &msk2); + + /* If both events can go on any counter, OK. */ + if (msk1 == (PIC_UPPER | PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If one event is limited to a specific counter, + * and the other can go on both, OK. + */ + if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && + msk1 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If the events are fixed to different counters, OK. */ + if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || + (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) + return 0; + + /* Otherwise, there is a conflict. */ + } + + return -1; +} + +static int check_excludes(struct perf_event **evts, int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + struct perf_event *event; + int i, n, first; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; i++) { + event = evts[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + return 0; +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *evts[], unsigned long *events) +{ + struct perf_event *event; + int n = 0; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + evts[n] = group; + events[n++] = group->hw.event_base; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + evts[n] = event; + events[n++] = event->hw.event_base; + } + } + return n; +} + static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; + struct perf_event *evts[MAX_HWEVENTS]; struct hw_perf_event *hwc = &event->hw; + unsigned long events[MAX_HWEVENTS]; const struct perf_event_map *pmap; u64 enc; + int n; if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type != PERF_TYPE_HARDWARE) + if (attr->type == PERF_TYPE_HARDWARE) { + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + pmap = sparc_pmu->event_map(attr->config); + } else if (attr->type == PERF_TYPE_HW_CACHE) { + pmap = sparc_map_cache_event(attr->config); + if (IS_ERR(pmap)) + return PTR_ERR(pmap); + } else return -EOPNOTSUPP; - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - - perf_event_grab_pmc(); - event->destroy = hw_perf_event_destroy; - /* We save the enable bits in the config_base. So to * turn off sampling just write 'config', and to enable * things write 'config | config_base'. @@ -411,15 +877,39 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; + hwc->event_base = perf_event_encode(pmap); + + enc = pmap->encoding; + + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + perf_max_events - 1, + evts, events); + if (n < 0) + return -EINVAL; + } + events[n] = hwc->event_base; + evts[n] = event; + + if (check_excludes(evts, n, 1)) + return -EINVAL; + + if (sparc_check_constraints(events, n + 1)) + return -EINVAL; + + /* Try to do all error checking before this point, as unwinding + * state after grabbing the PMC is difficult. + */ + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + if (!hwc->sample_period) { hwc->sample_period = MAX_PERIOD; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); } - pmap = sparc_pmu->event_map(attr->config); - - enc = pmap->encoding; if (pmap->pic_mask & PIC_UPPER) { hwc->idx = PIC_UPPER_INDEX; enc <<= sparc_pmu->upper_shift; @@ -472,7 +962,7 @@ void perf_event_print_debug(void) } static int __kprobes perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) + unsigned long cmd, void *__args) { struct die_args *args = __args; struct perf_sample_data data; @@ -513,7 +1003,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); } return NOTIFY_STOP; @@ -525,8 +1015,15 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { static bool __init supported_pmu(void) { - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; + if (!strcmp(sparc_pmu_type, "ultra3") || + !strcmp(sparc_pmu_type, "ultra3+") || + !strcmp(sparc_pmu_type, "ultra3i") || + !strcmp(sparc_pmu_type, "ultra4+")) { + sparc_pmu = &ultra3_pmu; + return true; + } + if (!strcmp(sparc_pmu_type, "niagara")) { + sparc_pmu = &niagara1_pmu; return true; } if (!strcmp(sparc_pmu_type, "niagara2")) { diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index 138910c67206..d80a65d9e893 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -79,6 +79,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len err = -ENODEV; + mutex_lock(&of_set_property_mutex); write_lock(&devtree_lock); prevp = &dp->properties; while (*prevp) { @@ -88,9 +89,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len void *old_val = prop->value; int ret; - mutex_lock(&of_set_property_mutex); ret = prom_setprop(dp->node, name, val, len); - mutex_unlock(&of_set_property_mutex); err = -EINVAL; if (ret >= 0) { @@ -109,6 +108,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len prevp = &(*prevp)->next; } write_unlock(&devtree_lock); + mutex_unlock(&of_set_property_mutex); /* XXX Upate procfs if necessary... */ diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c index b956fd71c131..d231cbd5c526 100644 --- a/arch/sparc/kernel/visemul.c +++ b/arch/sparc/kernel/visemul.c @@ -617,7 +617,7 @@ static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) rs2 = fps_regval(f, RS2(insn)); rd_val = 0; - src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0); + src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); for (byte = 0; byte < 4; byte++) { u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; u32 prod = src1 * src2; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a70a5e1904d9..1886d37d411b 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -265,7 +265,7 @@ static void flush_dcache(unsigned long pfn) struct page *page; page = pfn_to_page(pfn); - if (page && page_mapping(page)) { + if (page) { unsigned long pg_flags; pg_flags = page->flags; diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index c2f772dbd556..77d1b313e344 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -45,7 +45,7 @@ extern void free_initmem(void); #define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) #define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ - sizeof(struct page *)) >> VMEMMAP_CHUNK_SHIFT) + sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) extern unsigned long vmemmap_table[VMEMMAP_SIZE]; #endif diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index f97cb8b6ee5f..f9024bccff16 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -11,6 +11,7 @@ #include <linux/oprofile.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/param.h> /* for HZ */ #ifdef CONFIG_SPARC64 #include <linux/notifier.h> |