From 7d281b620d229486429d851b10a05da871d22e79 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 28 Jul 2016 19:37:21 +0100 Subject: ARM: 8589/1: asm/memory.h: remove dead definitions The last ad-hoc __phys_to_virt definition was removed in commit fd0053c9 ("ARM: realview: remove sparsemem hack"). Therefore we can remove the unneeded definitions and unduplicate the virt_to_pfn macro from asm/memory.h. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 31c07a2cc100..76cbd9c674df 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -159,13 +159,8 @@ * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. */ -#if defined(__virt_to_phys) -#define PHYS_OFFSET PLAT_PHYS_OFFSET -#define PHYS_PFN_OFFSET ((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT)) - -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#elif defined(CONFIG_ARM_PATCH_PHYS_VIRT) +#if defined(CONFIG_ARM_PATCH_PHYS_VIRT) /* * Constants used to force the right instruction encodings and shifts @@ -182,10 +177,6 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) -#define virt_to_pfn(kaddr) \ - ((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \ - PHYS_PFN_OFFSET) - #define __pv_stub(from,to,instr,type) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ @@ -257,12 +248,12 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) return x - PHYS_OFFSET + PAGE_OFFSET; } +#endif + #define virt_to_pfn(kaddr) \ ((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \ PHYS_PFN_OFFSET) -#endif - /* * These are *only* valid on the kernel direct mapped RAM memory. * Note: Drivers should NOT use these. They are the wrong -- cgit v1.2.3 From d782e426b835bd2e79d868eb4af8510ed79e0aee Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 5 Aug 2016 06:03:41 +0100 Subject: ARM: 8594/1: enable binfmt_flat on systems with an MMU Now that the generic changes are in place, this can be enabled on ARM with the use of proper user space accessors in the flat_get_addr_from_rp() and flat_put_addr_at_rp() handlers as rp actually holds a user space address. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/flat.h | 5 +++-- fs/Kconfig.binfmt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h index e847d23351ed..acf1d14b89a6 100644 --- a/arch/arm/include/asm/flat.h +++ b/arch/arm/include/asm/flat.h @@ -8,8 +8,9 @@ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_get_addr_from_rp(rp, relval, flags, persistent) ((void)persistent,get_unaligned(rp)) -#define flat_put_addr_at_rp(rp, val, relval) put_unaligned(val,rp) +#define flat_get_addr_from_rp(rp, relval, flags, persistent) \ + ({ unsigned long __val; __get_user_unaligned(__val, rp); __val; }) +#define flat_put_addr_at_rp(rp, val, relval) __put_user_unaligned(val, rp) #define flat_get_relocate_addr(rel) (rel) #define flat_set_persistent(relval, p) 0 diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index c7efddf6e038..4c09d93d9569 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -89,7 +89,7 @@ config BINFMT_SCRIPT config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU || M68K + depends on !MMU || ARM || M68K depends on !FRV || BROKEN help Support uClinux FLAT format binaries. -- cgit v1.2.3 From 10b52a49ac62cd8cedc08c50178dc6fbb8925b87 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 18 Aug 2016 11:45:18 +0100 Subject: ARM: 8598/1: remove traces of perf_ops_bp Even though perf_ops_bp was removed/renamed back in commit b0a873ebbf87bf38 ("perf: Register PMU implementations"), as part of v2.6.37, its definition still lives on in some arch headers. This patch removes the vestigal definition from arm. Signed-off-by: Mark Rutland Acked-by: Will Deacon Cc: Russell King Signed-off-by: Russell King --- arch/arm/include/asm/hw_breakpoint.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 8e427c7b4425..afcaf8bf971b 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -114,7 +114,6 @@ struct notifier_block; struct perf_event; struct pmu; -extern struct pmu perf_ops_bp; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, int *gen_len, int *gen_type); extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -- cgit v1.2.3 From 83809b90a6dbedbcd94fcb99c9cde9477534f3d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2016 15:15:03 +0100 Subject: ARM: sa1100: move StrongARM CPU ID checks to cputype.h Move the StrongARM CPU ID checks out of the platform's hardware.h file into asm/cputype.h Acked-by: Viresh Kumar Signed-off-by: Russell King --- arch/arm/include/asm/cputype.h | 21 +++++++++++++++++++++ arch/arm/mach-sa1100/include/mach/hardware.h | 18 ------------------ drivers/cpufreq/sa1110-cpufreq.c | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 1ee94c716a7f..d6a4902a75d7 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -60,6 +60,7 @@ ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK) #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_DEC 0x44 #define ARM_CPU_IMP_INTEL 0x69 /* ARM implemented processors */ @@ -76,6 +77,17 @@ #define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_PART_MASK 0xff00fff0 +/* DEC implemented cores */ +#define ARM_CPU_PART_SA1100 0x4400a110 + +/* Intel implemented cores */ +#define ARM_CPU_PART_SA1110 0x6900b110 +#define ARM_CPU_REV_SA1110_A0 0 +#define ARM_CPU_REV_SA1110_B0 4 +#define ARM_CPU_REV_SA1110_B1 5 +#define ARM_CPU_REV_SA1110_B2 6 +#define ARM_CPU_REV_SA1110_B4 8 + #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 #define ARM_CPU_XSCALE_ARCH_V2 0x4000 @@ -173,6 +185,11 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } +static inline unsigned int __attribute_const__ read_cpuid_revision(void) +{ + return read_cpuid_id() & 0x0000000f; +} + /* * The CPU part number is meaningless without referring to the CPU * implementer: implementers are free to define their own part numbers @@ -208,6 +225,10 @@ static inline unsigned int __attribute_const__ read_cpuid_mpidr(void) return read_cpuid(CPUID_MPIDR); } +/* StrongARM-11x0 CPUs */ +#define cpu_is_sa1100() (read_cpuid_part() == ARM_CPU_PART_SA1100) +#define cpu_is_sa1110() (read_cpuid_part() == ARM_CPU_PART_SA1110) + /* * Intel's XScale3 core supports some v6 features (supersections, L2) * but advertises itself as v5 as it does not support the v6 ISA. For diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h index cbedd75a9d65..55c85ce7010d 100644 --- a/arch/arm/mach-sa1100/include/mach/hardware.h +++ b/arch/arm/mach-sa1100/include/mach/hardware.h @@ -36,28 +36,10 @@ #define io_v2p( x ) \ ( (((x)&0x00ffffff) | (((x)&(0x30000000>>VIO_SHIFT))< - -#define CPU_REVISION (read_cpuid_id() & 15) - -#define cpu_is_sa1100() ((read_cpuid_id() & CPU_SA1100_MASK) == CPU_SA1100_ID) -#define cpu_is_sa1110() ((read_cpuid_id() & CPU_SA1110_MASK) == CPU_SA1110_ID) - # define __REG(x) (*((volatile unsigned long __iomem *)io_p2v(x))) # define __PREG(x) (io_v2p((unsigned long)&(x))) diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c index b5befc211172..2bac9b6cfeea 100644 --- a/drivers/cpufreq/sa1110-cpufreq.c +++ b/drivers/cpufreq/sa1110-cpufreq.c @@ -159,7 +159,7 @@ sdram_calculate_timing(struct sdram_info *sd, u_int cpu_khz, * half speed or use delayed read latching (errata 13). */ if ((ns_to_cycles(sdram->tck, sd_khz) > 1) || - (CPU_REVISION < CPU_SA1110_B2 && sd_khz < 62000)) + (read_cpuid_revision() < ARM_CPU_REV_SA1110_B2 && sd_khz < 62000)) sd_khz /= 2; sd->mdcnfg = MDCNFG & 0x007f007f; -- cgit v1.2.3 From 5a0e0691147a7f841ddb54d067b165071dff592f Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Tue, 23 Aug 2016 14:25:32 +0100 Subject: ARM: 8601/1: Remove unused secure_flush_area API This patch removes the unused secure_flush_area function. The only consumer of this function has moved to using the streaming DMA APIs. Signed-off-by: Andy Gross Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 9156fc303afd..bdd283bc5842 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -501,21 +501,4 @@ static inline void set_kernel_text_ro(void) { } void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); -/** - * secure_flush_area - ensure coherency across the secure boundary - * @addr: virtual address - * @size: size of region - * - * Ensure that the specified area of memory is coherent across the secure - * boundary from the non-secure side. This is used when calling secure - * firmware where the secure firmware does not ensure coherency. - */ -static inline void secure_flush_area(const void *addr, size_t size) -{ - phys_addr_t phys = __pa(addr); - - __cpuc_flush_dcache_area((void *)addr, size); - outer_flush_range(phys, phys + size); -} - #endif -- cgit v1.2.3 From 35fa91eed817d2c65c59ef5a9737011313be6ac0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Aug 2016 17:21:02 +0200 Subject: ARM: kernel: merge core and init PLTs The PLT code uses a separate .init.plt section to allocate PLT entries for jump and call instructions in __init code. However, even for fairly sizable modules like mac80211.ko, we only end up with a couple of PLT entries in the .init section, and so we can simplify the code significantly by emitting all PLT entries into the same section. Tested-by: Jongsung Kim Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/module.h | 6 ++-- arch/arm/kernel/module-plts.c | 68 ++++++++++++++----------------------------- arch/arm/kernel/module.lds | 3 +- 3 files changed, 25 insertions(+), 52 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index e358b7966c06..464748b9fd7d 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -23,10 +23,8 @@ struct mod_arch_specific { struct unwind_table *unwind[ARM_SEC_MAX]; #endif #ifdef CONFIG_ARM_MODULE_PLTS - struct elf32_shdr *core_plt; - struct elf32_shdr *init_plt; - int core_plt_count; - int init_plt_count; + struct elf32_shdr *plt; + int plt_count; #endif }; diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 0c7efc3446c0..6832d1d6444e 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -30,28 +30,16 @@ struct plt_entries { u32 lit[PLT_ENT_COUNT]; }; -static bool in_init(const struct module *mod, u32 addr) -{ - return addr - (u32)mod->init_layout.base < mod->init_layout.size; -} - u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { struct plt_entries *plt, *plt_end; - int c, *count; - - if (in_init(mod, loc)) { - plt = (void *)mod->arch.init_plt->sh_addr; - plt_end = (void *)plt + mod->arch.init_plt->sh_size; - count = &mod->arch.init_plt_count; - } else { - plt = (void *)mod->arch.core_plt->sh_addr; - plt_end = (void *)plt + mod->arch.core_plt->sh_size; - count = &mod->arch.core_plt_count; - } + int c; + + plt = (void *)mod->arch.plt->sh_addr; + plt_end = (void *)plt + mod->arch.plt->sh_size; /* Look for an existing entry pointing to 'val' */ - for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { + for (c = mod->arch.plt_count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { int i; if (!c) { @@ -60,13 +48,13 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, { val, } }; - ++*count; + mod->arch.plt_count++; return (u32)plt->ldr; } for (i = 0; i < PLT_ENT_COUNT; i++) { if (!plt->lit[i]) { plt->lit[i] = val; - ++*count; + mod->arch.plt_count++; } if (plt->lit[i] == val) return (u32)&plt->ldr[i]; @@ -132,21 +120,19 @@ static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long core_plts = 0, init_plts = 0; + unsigned long plts = 0; Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; /* * To store the PLTs, we expand the .text section for core module code - * and the .init.text section for initialization code. + * and for initialization code. */ for (s = sechdrs; s < sechdrs_end; ++s) - if (strcmp(".core.plt", secstrings + s->sh_name) == 0) - mod->arch.core_plt = s; - else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) - mod->arch.init_plt = s; + if (strcmp(".plt", secstrings + s->sh_name) == 0) + mod->arch.plt = s; - if (!mod->arch.core_plt || !mod->arch.init_plt) { - pr_err("%s: sections missing\n", mod->name); + if (!mod->arch.plt) { + pr_err("%s: module PLT section missing\n", mod->name); return -ENOEXEC; } @@ -158,26 +144,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (s->sh_type != SHT_REL) continue; - if (strstr(secstrings + s->sh_name, ".init")) - init_plts += count_plts(dstsec->sh_addr, rels, numrels); - else - core_plts += count_plts(dstsec->sh_addr, rels, numrels); + plts += count_plts(dstsec->sh_addr, rels, numrels); } - mod->arch.core_plt->sh_type = SHT_NOBITS; - mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, - sizeof(struct plt_entries)); - mod->arch.core_plt_count = 0; - - mod->arch.init_plt->sh_type = SHT_NOBITS; - mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; - mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, - sizeof(struct plt_entries)); - mod->arch.init_plt_count = 0; - pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, - mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); + mod->arch.plt->sh_type = SHT_NOBITS; + mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE, + sizeof(struct plt_entries)); + mod->arch.plt_count = 0; + + pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size); return 0; } diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds index 3682fa107918..05881e2b414c 100644 --- a/arch/arm/kernel/module.lds +++ b/arch/arm/kernel/module.lds @@ -1,4 +1,3 @@ SECTIONS { - .core.plt : { BYTE(0) } - .init.plt : { BYTE(0) } + .plt : { BYTE(0) } } -- cgit v1.2.3 From 26150aa96d60298de669e3d81a5b7e132b653ce7 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Tue, 30 Aug 2016 17:24:34 +0100 Subject: ARM: 8602/1: factor out CSSELR/CCSIDR operations that use cp15 directly Currently we use raw cp15 operations to access the cache setup data. This patch abstracts the CSSELR and CCSIDR accessors out to a header so that the implementation for them can be switched out as we do with other cpu/cachetype operations. Signed-off-by: Jonathan Austin Signed-off-by: Vladimir Murzin Tested-by: Andras Szemzo Tested-by: Joachim Eastwood Tested-by: Alexandre TORGUE Signed-off-by: Russell King --- arch/arm/include/asm/cachetype.h | 24 ++++++++++++++++++++++++ arch/arm/kernel/setup.c | 7 ++----- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h index 7ea78144ae22..8609de8e313a 100644 --- a/arch/arm/include/asm/cachetype.h +++ b/arch/arm/include/asm/cachetype.h @@ -56,4 +56,28 @@ static inline unsigned int __attribute__((pure)) cacheid_is(unsigned int mask) (~__CACHEID_NEVER & __CACHEID_ARCH_MIN & mask & cacheid); } +#define CSSELR_ICACHE 1 +#define CSSELR_DCACHE 0 + +#define CSSELR_L1 (0 << 1) +#define CSSELR_L2 (1 << 1) +#define CSSELR_L3 (2 << 1) +#define CSSELR_L4 (3 << 1) +#define CSSELR_L5 (4 << 1) +#define CSSELR_L6 (5 << 1) +#define CSSELR_L7 (6 << 1) + +static inline void set_csselr(unsigned int cache_selector) +{ + asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (cache_selector)); +} + +static inline unsigned int read_ccsidr(void) +{ + unsigned int val; + + asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (val)); + return val; +} + #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index aca999e17184..d7568808eb7b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -290,12 +290,9 @@ static int cpu_has_aliasing_icache(unsigned int arch) /* arch specifies the register format */ switch (arch) { case CPU_ARCH_ARMv7: - asm("mcr p15, 2, %0, c0, c0, 0 @ set CSSELR" - : /* No output operands */ - : "r" (1)); + set_csselr(CSSELR_ICACHE | CSSELR_L1); isb(); - asm("mrc p15, 1, %0, c0, c0, 0 @ read CCSIDR" - : "=r" (id_reg)); + id_reg = read_ccsidr(); line_size = 4 << ((id_reg & 0x7) + 2); num_sets = ((id_reg >> 13) & 0x7fff) + 1; aliasing_icache = (line_size * num_sets) > PAGE_SIZE; -- cgit v1.2.3 From 296909ee6d9cf98f68a61d5e9774ff5435df2c6a Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Tue, 30 Aug 2016 17:25:59 +0100 Subject: ARM: 8603/1: V7M: Add addresses for mem-mapped V7M cache operations V7M implements cache operations similarly to V7A/R, however all operations are performed via memory-mapped IO instead of co-processor operations. This patch adds register definitions relevant to the V7M ARM architecture's cache architecture. Signed-off-by: Jonathan Austin Signed-off-by: Vladimir Murzin Tested-by: Andras Szemzo Tested-by: Joachim Eastwood Tested-by: Alexandre TORGUE Signed-off-by: Russell King --- arch/arm/include/asm/v7m.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h index 615781c61627..1fd775c1bc5d 100644 --- a/arch/arm/include/asm/v7m.h +++ b/arch/arm/include/asm/v7m.h @@ -24,6 +24,9 @@ #define V7M_SCB_CCR 0x14 #define V7M_SCB_CCR_STKALIGN (1 << 9) +#define V7M_SCB_CCR_DC (1 << 16) +#define V7M_SCB_CCR_IC (1 << 17) +#define V7M_SCB_CCR_BP (1 << 18) #define V7M_SCB_SHPR2 0x1c #define V7M_SCB_SHPR3 0x20 @@ -47,6 +50,25 @@ #define EXC_RET_STACK_MASK 0x00000004 #define EXC_RET_THREADMODE_PROCESSSTACK 0xfffffffd +/* Cache related definitions */ + +#define V7M_SCB_CLIDR 0x78 /* Cache Level ID register */ +#define V7M_SCB_CTR 0x7c /* Cache Type register */ +#define V7M_SCB_CCSIDR 0x80 /* Cache size ID register */ +#define V7M_SCB_CSSELR 0x84 /* Cache size selection register */ + +/* Cache opeartions */ +#define V7M_SCB_ICIALLU 0x250 /* I-cache invalidate all to PoU */ +#define V7M_SCB_ICIMVAU 0x258 /* I-cache invalidate by MVA to PoU */ +#define V7M_SCB_DCIMVAC 0x25c /* D-cache invalidate by MVA to PoC */ +#define V7M_SCB_DCISW 0x260 /* D-cache invalidate by set-way */ +#define V7M_SCB_DCCMVAU 0x264 /* D-cache clean by MVA to PoU */ +#define V7M_SCB_DCCMVAC 0x268 /* D-cache clean by MVA to PoC */ +#define V7M_SCB_DCCSW 0x26c /* D-cache clean by set-way */ +#define V7M_SCB_DCCIMVAC 0x270 /* D-cache clean and invalidate by MVA to PoC */ +#define V7M_SCB_DCCISW 0x274 /* D-cache clean and invalidate by set-way */ +#define V7M_SCB_BPIALL 0x278 /* D-cache clean and invalidate by set-way */ + #ifndef __ASSEMBLY__ enum reboot_mode; -- cgit v1.2.3 From f5a5c89e36d0897b65e4e6bc2f646f75f8074263 Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Tue, 30 Aug 2016 17:27:19 +0100 Subject: ARM: 8604/1: V7M: Add support for reading the CTR with read_cpuid_cachetype() With the addition of caches to the V7M Architecture a new Cache Type Register (CTR) is defined at 0xE000ED7C. This register serves the same purpose as the V7A/R version and accessed via the read_cpuid_cachetype. Signed-off-by: Jonathan Austin Signed-off-by: Vladimir Murzin Tested-by: Andras Szemzo Tested-by: Joachim Eastwood Tested-by: Alexandre TORGUE Signed-off-by: Russell King --- arch/arm/include/asm/cachetype.h | 15 +++++++++++++++ arch/arm/include/asm/cputype.h | 15 ++++++++++----- arch/arm/kernel/setup.c | 9 +++++---- 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h index 8609de8e313a..01509ae0bbec 100644 --- a/arch/arm/include/asm/cachetype.h +++ b/arch/arm/include/asm/cachetype.h @@ -67,6 +67,7 @@ static inline unsigned int __attribute__((pure)) cacheid_is(unsigned int mask) #define CSSELR_L6 (5 << 1) #define CSSELR_L7 (6 << 1) +#ifndef CONFIG_CPU_V7M static inline void set_csselr(unsigned int cache_selector) { asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (cache_selector)); @@ -79,5 +80,19 @@ static inline unsigned int read_ccsidr(void) asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (val)); return val; } +#else /* CONFIG_CPU_V7M */ +#include +#include "asm/v7m.h" + +static inline void set_csselr(unsigned int cache_selector) +{ + writel(cache_selector, BASEADDR_V7M_SCB + V7M_SCB_CTR); +} + +static inline unsigned int read_ccsidr(void) +{ + return readl(BASEADDR_V7M_SCB + V7M_SCB_CCSIDR); +} +#endif #endif diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index d6a4902a75d7..754f86f667d4 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -164,6 +164,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void) return read_cpuid(CPUID_ID); } +static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) +{ + return read_cpuid(CPUID_CACHETYPE); +} + #elif defined(CONFIG_CPU_V7M) static inline unsigned int __attribute_const__ read_cpuid_id(void) @@ -171,6 +176,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void) return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID); } +static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) +{ + return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR); +} + #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */ static inline unsigned int __attribute_const__ read_cpuid_id(void) @@ -210,11 +220,6 @@ static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } -static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) -{ - return read_cpuid(CPUID_CACHETYPE); -} - static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void) { return read_cpuid(CPUID_TCM); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d7568808eb7b..34e3f3c45634 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -312,11 +312,12 @@ static void __init cacheid_init(void) { unsigned int arch = cpu_architecture(); - if (arch == CPU_ARCH_ARMv7M) { - cacheid = 0; - } else if (arch >= CPU_ARCH_ARMv6) { + if (arch >= CPU_ARCH_ARMv6) { unsigned int cachetype = read_cpuid_cachetype(); - if ((cachetype & (7 << 29)) == 4 << 29) { + + if ((arch == CPU_ARCH_ARMv7M) && !cachetype) { + cacheid = 0; + } else if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ arch = CPU_ARCH_ARMv7; cacheid = CACHEID_VIPT_NONALIASING; -- cgit v1.2.3 From b2bf482a5099264fb75936b5b552cdf3c247c93a Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 30 Aug 2016 17:28:43 +0100 Subject: ARM: 8605/1: V7M: fix notrace variant of save_and_disable_irqs Commit 8e43a905 "ARM: 7325/1: fix v7 boot with lockdep enabled" introduced notrace variant of save_and_disable_irqs to balance notrace variant of restore_irqs; however V7M case has been missed. It was not noticed because cache-v7.S the only place where notrace variant is used. So fix it, since we are going to extend V7 cache routines to handle V7M case too. Signed-off-by: Vladimir Murzin Tested-by: Andras Szemzo Tested-by: Joachim Eastwood Tested-by: Alexandre TORGUE Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 4eaea2173bf8..68b06f9c65de 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -159,7 +159,11 @@ .endm .macro save_and_disable_irqs_notrace, oldcpsr +#ifdef CONFIG_CPU_V7M + mrs \oldcpsr, primask +#else mrs \oldcpsr, cpsr +#endif disable_irq_notrace .endm -- cgit v1.2.3 From bc0ee9d24ad21a5c2b5944f66623a02e9c8831aa Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Tue, 30 Aug 2016 17:31:22 +0100 Subject: ARM: 8607/1: V7M: Wire up caches for V7M processors with cache support. This patch does the plumbing required to invoke the V7M cache code added in earlier patches in this series, although there is no users for that yet. In order to honour the I/D cache disable config options, this patch changes the mechanism by which the CCR is set on boot, to be more like V7A/R. Signed-off-by: Jonathan Austin Signed-off-by: Vladimir Murzin Tested-by: Andras Szemzo Tested-by: Joachim Eastwood Tested-by: Alexandre TORGUE Signed-off-by: Russell King --- arch/arm/include/asm/glue-cache.h | 4 ---- arch/arm/kernel/head-nommu.S | 16 +++++++++++++++- arch/arm/mm/Kconfig | 10 +++++++--- arch/arm/mm/Makefile | 2 ++ arch/arm/mm/proc-v7m.S | 5 ++--- 5 files changed, 26 insertions(+), 11 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index cab07f69382d..01c3d92624e5 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -118,11 +118,7 @@ #endif #if defined(CONFIG_CPU_V7M) -# ifdef _CACHE # define MULTI_CACHE 1 -# else -# define _CACHE nop -# endif #endif #if !defined(_CACHE) && !defined(MULTI_CACHE) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index fb1a69eb49c1..6b4eb27b8758 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -158,7 +158,21 @@ __after_proc_init: bic r0, r0, #CR_V #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg -#endif /* CONFIG_CPU_CP15 */ +#elif defined (CONFIG_CPU_V7M) + /* For V7M systems we want to modify the CCR similarly to the SCTLR */ +#ifdef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #V7M_SCB_CCR_DC +#endif +#ifdef CONFIG_CPU_BPREDICT_DISABLE + bic r0, r0, #V7M_SCB_CCR_BP +#endif +#ifdef CONFIG_CPU_ICACHE_DISABLE + bic r0, r0, #V7M_SCB_CCR_IC +#endif + movw r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + movt r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + str r0, [r3] +#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ ret lr ENDPROC(__after_proc_init) .ltorg diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d15a7fe51618..e613122e5e1b 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -403,6 +403,7 @@ config CPU_V7M bool select CPU_32v7M select CPU_ABRT_NOMMU + select CPU_CACHE_V7M select CPU_CACHE_NOP select CPU_PABRT_LEGACY select CPU_THUMBONLY @@ -518,6 +519,9 @@ config CPU_CACHE_VIPT config CPU_CACHE_FA bool +config CPU_CACHE_V7M + bool + if MMU # The copy-page model config CPU_COPY_V4WT @@ -750,14 +754,14 @@ config CPU_HIGH_VECTOR config CPU_ICACHE_DISABLE bool "Disable I-Cache (I-bit)" - depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3) + depends on (CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)) || CPU_V7M help Say Y here to disable the processor instruction cache. Unless you have a reason not to or are unsure, say N. config CPU_DCACHE_DISABLE bool "Disable D-Cache (C-bit)" - depends on CPU_CP15 && !SMP + depends on (CPU_CP15 && !SMP) || CPU_V7M help Say Y here to disable the processor data cache. Unless you have a reason not to or are unsure, say N. @@ -792,7 +796,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 + depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 || CPU_V7M help Say Y here to disable branch prediction. If unsure, say N. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 7f76d96ce546..e75abaeb16db 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -43,9 +43,11 @@ obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o +obj-$(CONFIG_CPU_CACHE_V7M) += cache-v7m.o AFLAGS_cache-v6.o :=-Wa,-march=armv6 AFLAGS_cache-v7.o :=-Wa,-march=armv7-a +AFLAGS_cache-v7m.o :=-Wa,-march=armv7-m obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 7229d8d0be1a..11f5816e2680 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -118,9 +118,8 @@ __v7m_setup: @ Configure the System Control Register to ensure 8-byte stack alignment @ Note the STKALIGN bit is either RW or RAO. - ldr r12, [r0, V7M_SCB_CCR] @ system control register - orr r12, #V7M_SCB_CCR_STKALIGN - str r12, [r0, V7M_SCB_CCR] + ldr r0, [r0, V7M_SCB_CCR] @ system control register + orr r0, #V7M_SCB_CCR_STKALIGN ret lr ENDPROC(__v7m_setup) -- cgit v1.2.3 From b828f960215f02e5d2c88bbd27565c694254a15a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Sep 2016 10:35:18 +0100 Subject: ARM: 8611/1: l2x0: add PMU support The L2C-220 (AKA L220) and L2C-310 (AKA PL310) cache controllers feature a Performance Monitoring Unit (PMU), which can be useful for tuning and/or debugging. This hardware is always present and the relevant registers are accessible to non-secure accesses. Thus, no special firmware interface is necessary. This patch adds support for the PMU, plugging into the usual perf infrastructure. The overflow interrupt is not always available (e.g. on RealView PBX A9 it is not wired up at all), and the hardware counters saturate, so the driver does not make use of this. Instead, the driver periodically polls and reset counters as required to avoid losing events due to saturation. Signed-off-by: Mark Rutland Acked-by: Pawel Moll Tested-by: Kim Phillips Cc: Russell King Cc: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-l2x0.h | 19 + arch/arm/mm/Kconfig | 7 + arch/arm/mm/Makefile | 1 + arch/arm/mm/cache-l2x0-pmu.c | 584 +++++++++++++++++++++++++++++ arch/arm/mm/cache-l2x0.c | 6 + include/linux/cpuhotplug.h | 1 + 6 files changed, 618 insertions(+) create mode 100644 arch/arm/mm/cache-l2x0-pmu.c (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 3a5ec1c25659..736292b42fca 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -87,6 +87,15 @@ #define L310_CACHE_ID_RTL_R3P2 0x08 #define L310_CACHE_ID_RTL_R3P3 0x09 +#define L2X0_EVENT_CNT_CTRL_ENABLE BIT(0) + +#define L2X0_EVENT_CNT_CFG_SRC_SHIFT 2 +#define L2X0_EVENT_CNT_CFG_SRC_MASK 0xf +#define L2X0_EVENT_CNT_CFG_SRC_DISABLED 0 +#define L2X0_EVENT_CNT_CFG_INT_DISABLED 0 +#define L2X0_EVENT_CNT_CFG_INT_INCR 1 +#define L2X0_EVENT_CNT_CFG_INT_OVERFLOW 2 + /* L2C auxiliary control register - bits common to L2C-210/220/310 */ #define L2C_AUX_CTRL_WAY_SIZE_SHIFT 17 #define L2C_AUX_CTRL_WAY_SIZE_MASK (7 << 17) @@ -157,6 +166,16 @@ static inline int l2x0_of_init(u32 aux_val, u32 aux_mask) } #endif +#ifdef CONFIG_CACHE_L2X0_PMU +void l2x0_pmu_register(void __iomem *base, u32 part); +void l2x0_pmu_suspend(void); +void l2x0_pmu_resume(void); +#else +static inline void l2x0_pmu_register(void __iomem *base, u32 part) {} +static inline void l2x0_pmu_suspend(void) {} +static inline void l2x0_pmu_resume(void) {} +#endif + struct l2x0_regs { unsigned long phy_base; unsigned long aux_ctrl; diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index e613122e5e1b..c1799dd1d0d9 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -920,6 +920,13 @@ config CACHE_L2X0 help This option enables the L2x0 PrimeCell. +config CACHE_L2X0_PMU + bool "L2x0 performance monitor support" if CACHE_L2X0 + depends on PERF_EVENTS + help + This option enables support for the performance monitoring features + of the L220 and PL310 outer cache controllers. + if CACHE_L2X0 config PL310_ERRATA_588369 diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index e75abaeb16db..e8698241ece9 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -103,6 +103,7 @@ AFLAGS_proc-v7.o :=-Wa,-march=armv7-a obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o +obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c new file mode 100644 index 000000000000..976d3057272e --- /dev/null +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -0,0 +1,584 @@ +/* + * L220/L310 cache controller support + * + * Copyright (C) 2016 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define PMU_NR_COUNTERS 2 + +static void __iomem *l2x0_base; +static struct pmu *l2x0_pmu; +static cpumask_t pmu_cpu; + +static const char *l2x0_name; + +static ktime_t l2x0_pmu_poll_period; +static struct hrtimer l2x0_pmu_hrtimer; + +/* + * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0. + * Registers controlling these are laid out in pairs, in descending order, i.e. + * the register for Counter1 comes first, followed by the register for + * Counter0. + * We ensure that idx 0 -> Counter0, and idx1 -> Counter1. + */ +static struct perf_event *events[PMU_NR_COUNTERS]; + +/* Find an unused counter */ +static int l2x0_pmu_find_idx(void) +{ + int i; + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (!events[i]) + return i; + } + + return -1; +} + +/* How many counters are allocated? */ +static int l2x0_pmu_num_active_counters(void) +{ + int i, cnt = 0; + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + cnt++; + } + + return cnt; +} + +static void l2x0_pmu_counter_config_write(int idx, u32 val) +{ + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx); +} + +static u32 l2x0_pmu_counter_read(int idx) +{ + return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); +} + +static void l2x0_pmu_counter_write(int idx, u32 val) +{ + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); +} + +static void __l2x0_pmu_enable(void) +{ + u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); + val |= L2X0_EVENT_CNT_CTRL_ENABLE; + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); +} + +static void __l2x0_pmu_disable(void) +{ + u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); + val &= ~L2X0_EVENT_CNT_CTRL_ENABLE; + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); +} + +static void l2x0_pmu_enable(struct pmu *pmu) +{ + if (l2x0_pmu_num_active_counters() == 0) + return; + + __l2x0_pmu_enable(); +} + +static void l2x0_pmu_disable(struct pmu *pmu) +{ + if (l2x0_pmu_num_active_counters() == 0) + return; + + __l2x0_pmu_disable(); +} + +static void warn_if_saturated(u32 count) +{ + if (count != 0xffffffff) + return; + + pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n"); +} + +static void l2x0_pmu_event_read(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + u64 prev_count, new_count, mask; + + do { + prev_count = local64_read(&hw->prev_count); + new_count = l2x0_pmu_counter_read(hw->idx); + } while (local64_xchg(&hw->prev_count, new_count) != prev_count); + + mask = GENMASK_ULL(31, 0); + local64_add((new_count - prev_count) & mask, &event->count); + + warn_if_saturated(new_count); +} + +static void l2x0_pmu_event_configure(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + + /* + * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we + * will *always* lose some number of events when a counter saturates, + * and have no way of detecting how many were lost. + * + * To minimize the impact of this, we try to maximize the period by + * always starting counters at zero. To ensure that group ratios are + * representative, we poll periodically to avoid counters saturating. + * See l2x0_pmu_poll(). + */ + local64_set(&hw->prev_count, 0); + l2x0_pmu_counter_write(hw->idx, 0); +} + +static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer) +{ + unsigned long flags; + int i; + + local_irq_save(flags); + __l2x0_pmu_disable(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + struct perf_event *event = events[i]; + + if (!event) + continue; + + l2x0_pmu_event_read(event); + l2x0_pmu_event_configure(event); + } + + __l2x0_pmu_enable(); + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period); + return HRTIMER_RESTART; +} + + +static void __l2x0_pmu_event_enable(int idx, u32 event) +{ + u32 val; + + val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT; + val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; + l2x0_pmu_counter_config_write(idx, val); +} + +static void l2x0_pmu_event_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE)); + l2x0_pmu_event_configure(event); + } + + hw->state = 0; + + __l2x0_pmu_event_enable(hw->idx, hw->config_base); +} + +static void __l2x0_pmu_event_disable(int idx) +{ + u32 val; + + val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT; + val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; + l2x0_pmu_counter_config_write(idx, val); +} + +static void l2x0_pmu_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED)) + return; + + __l2x0_pmu_event_disable(hw->idx); + + hw->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_UPDATE) { + l2x0_pmu_event_read(event); + hw->state |= PERF_HES_UPTODATE; + } +} + +static int l2x0_pmu_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + int idx = l2x0_pmu_find_idx(); + + if (idx == -1) + return -EAGAIN; + + /* + * Pin the timer, so that the overflows are handled by the chosen + * event->cpu (this is the same one as presented in "cpumask" + * attribute). + */ + if (l2x0_pmu_num_active_counters() == 0) + hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period, + HRTIMER_MODE_REL_PINNED); + + events[idx] = event; + hw->idx = idx; + + l2x0_pmu_event_configure(event); + + hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (flags & PERF_EF_START) + l2x0_pmu_event_start(event, 0); + + return 0; +} + +static void l2x0_pmu_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + l2x0_pmu_event_stop(event, PERF_EF_UPDATE); + + events[hw->idx] = NULL; + hw->idx = -1; + + if (l2x0_pmu_num_active_counters() == 0) + hrtimer_cancel(&l2x0_pmu_hrtimer); +} + +static bool l2x0_pmu_group_is_valid(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + struct perf_event *leader = event->group_leader; + struct perf_event *sibling; + int num_hw = 0; + + if (leader->pmu == pmu) + num_hw++; + else if (!is_software_event(leader)) + return false; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (sibling->pmu == pmu) + num_hw++; + else if (!is_software_event(sibling)) + return false; + } + + return num_hw <= PMU_NR_COUNTERS; +} + +static int l2x0_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + + if (event->attr.type != l2x0_pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || + event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK) + return -EINVAL; + + hw->config_base = event->attr.config; + + if (!l2x0_pmu_group_is_valid(event)) + return -EINVAL; + + event->cpu = cpumask_first(&pmu_cpu); + + return 0; +} + +struct l2x0_event_attribute { + struct device_attribute attr; + unsigned int config; + bool pl310_only; +}; + +#define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \ + (&((struct l2x0_event_attribute[]) {{ \ + .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \ + .config = _config, \ + .pl310_only = _pl310_only, \ + }})[0].attr.attr) + +#define L220_PLUS_EVENT_ATTR(_name, _config) \ + L2X0_EVENT_ATTR(_name, _config, false) + +#define PL310_EVENT_ATTR(_name, _config) \ + L2X0_EVENT_ATTR(_name, _config, true) + +static ssize_t l2x0_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct l2x0_event_attribute *lattr; + + lattr = container_of(attr, typeof(*lattr), attr); + return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config); +} + +static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct l2x0_event_attribute *lattr; + + lattr = container_of(attr, typeof(*lattr), attr.attr); + + if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0) + return attr->mode; + + return 0; +} + +static struct attribute *l2x0_pmu_event_attrs[] = { + L220_PLUS_EVENT_ATTR(co, 0x1), + L220_PLUS_EVENT_ATTR(drhit, 0x2), + L220_PLUS_EVENT_ATTR(drreq, 0x3), + L220_PLUS_EVENT_ATTR(dwhit, 0x4), + L220_PLUS_EVENT_ATTR(dwreq, 0x5), + L220_PLUS_EVENT_ATTR(dwtreq, 0x6), + L220_PLUS_EVENT_ATTR(irhit, 0x7), + L220_PLUS_EVENT_ATTR(irreq, 0x8), + L220_PLUS_EVENT_ATTR(wa, 0x9), + PL310_EVENT_ATTR(ipfalloc, 0xa), + PL310_EVENT_ATTR(epfhit, 0xb), + PL310_EVENT_ATTR(epfalloc, 0xc), + PL310_EVENT_ATTR(srrcvd, 0xd), + PL310_EVENT_ATTR(srconf, 0xe), + PL310_EVENT_ATTR(epfrcvd, 0xf), + NULL +}; + +static struct attribute_group l2x0_pmu_event_attrs_group = { + .name = "events", + .attrs = l2x0_pmu_event_attrs, + .is_visible = l2x0_pmu_event_attr_is_visible, +}; + +static ssize_t l2x0_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &pmu_cpu); +} + +static struct device_attribute l2x0_pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL); + +static struct attribute *l2x0_pmu_cpumask_attrs[] = { + &l2x0_pmu_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group l2x0_pmu_cpumask_attr_group = { + .attrs = l2x0_pmu_cpumask_attrs, +}; + +static const struct attribute_group *l2x0_pmu_attr_groups[] = { + &l2x0_pmu_event_attrs_group, + &l2x0_pmu_cpumask_attr_group, + NULL, +}; + +static void l2x0_pmu_reset(void) +{ + int i; + + __l2x0_pmu_disable(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) + __l2x0_pmu_event_disable(i); +} + +static int l2x0_pmu_offline_cpu(unsigned int cpu) +{ + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(l2x0_pmu, cpu, target); + cpumask_set_cpu(target, &pmu_cpu); + + return 0; +} + +void l2x0_pmu_suspend(void) +{ + int i; + + if (!l2x0_pmu) + return; + + l2x0_pmu_disable(l2x0_pmu); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE); + } + +} + +void l2x0_pmu_resume(void) +{ + int i; + + if (!l2x0_pmu) + return; + + l2x0_pmu_reset(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + l2x0_pmu_event_start(events[i], PERF_EF_RELOAD); + } + + l2x0_pmu_enable(l2x0_pmu); +} + +void __init l2x0_pmu_register(void __iomem *base, u32 part) +{ + /* + * Determine whether we support the PMU, and choose the name for sysfs. + * This is also used by l2x0_pmu_event_attr_is_visible to determine + * which events to display, as the PL310 PMU supports a superset of + * L220 events. + * + * The L210 PMU has a different programmer's interface, and is not + * supported by this driver. + * + * We must defer registering the PMU until the perf subsystem is up and + * running, so just stash the name and base, and leave that to another + * initcall. + */ + switch (part & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L220: + l2x0_name = "l2c_220"; + break; + case L2X0_CACHE_ID_PART_L310: + l2x0_name = "l2c_310"; + break; + default: + return; + } + + l2x0_base = base; +} + +static __init int l2x0_pmu_init(void) +{ + int ret; + + if (!l2x0_base) + return 0; + + l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL); + if (!l2x0_pmu) { + pr_warn("Unable to allocate L2x0 PMU\n"); + return -ENOMEM; + } + + *l2x0_pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .pmu_enable = l2x0_pmu_enable, + .pmu_disable = l2x0_pmu_disable, + .read = l2x0_pmu_event_read, + .start = l2x0_pmu_event_start, + .stop = l2x0_pmu_event_stop, + .add = l2x0_pmu_event_add, + .del = l2x0_pmu_event_del, + .event_init = l2x0_pmu_event_init, + .attr_groups = l2x0_pmu_attr_groups, + }; + + l2x0_pmu_reset(); + + /* + * We always use a hrtimer rather than an interrupt. + * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll. + * + * Polling once a second allows the counters to fill up to 1/128th on a + * quad-core test chip with cores clocked at 400MHz. Hopefully this + * leaves sufficient headroom to avoid overflow on production silicon + * at higher frequencies. + */ + l2x0_pmu_poll_period = ms_to_ktime(1000); + hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + l2x0_pmu_hrtimer.function = l2x0_pmu_poll; + + cpumask_set_cpu(0, &pmu_cpu); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE, + "AP_PERF_ARM_L2X0_ONLINE", NULL, + l2x0_pmu_offline_cpu); + if (ret) + goto out_pmu; + + ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1); + if (ret) + goto out_cpuhp; + + return 0; + +out_cpuhp: + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE); +out_pmu: + kfree(l2x0_pmu); + l2x0_pmu = NULL; + return ret; +} +device_initcall(l2x0_pmu_init); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index ca5595fa072a..d1870c777c6e 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -142,6 +142,8 @@ static void l2c_disable(void) { void __iomem *base = l2x0_base; + l2x0_pmu_suspend(); + outer_cache.flush_all(); l2c_write_sec(0, base, L2X0_CTRL); dsb(st); @@ -159,6 +161,8 @@ static void l2c_resume(void) /* Do not touch the controller if already enabled. */ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) l2c_enable(base, l2x0_data->num_lock); + + l2x0_pmu_resume(); } /* @@ -891,6 +895,8 @@ static int __init __l2c_init(const struct l2c_init_data *data, pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", data->type, cache_id, aux); + l2x0_pmu_register(l2x0_base, cache_id); + return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf530edfc..7e1ba14a3d78 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,6 +86,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_NOTIFY_ONLINE, -- cgit v1.2.3 From cf6e4ca3e53f734f8fcb253a686c1fdbb6a50e85 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 00:45:33 +0100 Subject: ARM: sa1111: add sa1111_get_irq() Add a helper function to get the irq number for a device. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 8 ++++++++ arch/arm/include/asm/hardware/sa1111.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 7f232ce50944..3c722a4143fe 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1357,6 +1357,14 @@ void sa1111_disable_device(struct sa1111_dev *sadev) } EXPORT_SYMBOL(sa1111_disable_device); +int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num) +{ + if (num >= ARRAY_SIZE(sadev->irq)) + return -EINVAL; + return sadev->irq[num]; +} +EXPORT_SYMBOL_GPL(sa1111_get_irq); + /* * SA1111 "Register Access Bus." * diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h index 7c2bbc7f0be1..267e43b5225f 100644 --- a/arch/arm/include/asm/hardware/sa1111.h +++ b/arch/arm/include/asm/hardware/sa1111.h @@ -446,6 +446,8 @@ struct sa1111_driver { int sa1111_enable_device(struct sa1111_dev *); void sa1111_disable_device(struct sa1111_dev *); +int sa1111_get_irq(struct sa1111_dev *, unsigned num); + unsigned int sa1111_pll_clock(struct sa1111_dev *); #define SA1111_AUDIO_ACLINK 0 -- cgit v1.2.3 From b60752f2b20c167859943e001727f0d4da419b23 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 13:46:46 +0100 Subject: ARM: sa1111: provide to_sa1111_device() macro Provide a nicer to_sa1111_device macro to convert a struct device to a sa1111_dev. We will need this for drivers when converting them to dev_pm_ops, or removing shutdown methods. Signed-off-by: Russell King --- arch/arm/common/sa1111.c | 18 +++++++++--------- arch/arm/include/asm/hardware/sa1111.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 3c722a4143fe..4ecd5120fce7 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -691,7 +691,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac, static void sa1111_dev_release(struct device *_dev) { - struct sa1111_dev *dev = SA1111_DEV(_dev); + struct sa1111_dev *dev = to_sa1111_device(_dev); kfree(dev); } @@ -899,7 +899,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) static int sa1111_remove_one(struct device *dev, void *data) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); if (dev->bus != &sa1111_bus_type) return 0; device_del(&sadev->dev); @@ -1373,7 +1373,7 @@ EXPORT_SYMBOL_GPL(sa1111_get_irq); */ static int sa1111_match(struct device *_dev, struct device_driver *_drv) { - struct sa1111_dev *dev = SA1111_DEV(_dev); + struct sa1111_dev *dev = to_sa1111_device(_dev); struct sa1111_driver *drv = SA1111_DRV(_drv); return !!(dev->devid & drv->devid); @@ -1381,7 +1381,7 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv) static int sa1111_bus_suspend(struct device *dev, pm_message_t state) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = 0; @@ -1392,7 +1392,7 @@ static int sa1111_bus_suspend(struct device *dev, pm_message_t state) static int sa1111_bus_resume(struct device *dev) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = 0; @@ -1406,12 +1406,12 @@ static void sa1111_bus_shutdown(struct device *dev) struct sa1111_driver *drv = SA1111_DRV(dev->driver); if (drv && drv->shutdown) - drv->shutdown(SA1111_DEV(dev)); + drv->shutdown(to_sa1111_device(dev)); } static int sa1111_bus_probe(struct device *dev) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = -ENODEV; @@ -1422,7 +1422,7 @@ static int sa1111_bus_probe(struct device *dev) static int sa1111_bus_remove(struct device *dev) { - struct sa1111_dev *sadev = SA1111_DEV(dev); + struct sa1111_dev *sadev = to_sa1111_device(dev); struct sa1111_driver *drv = SA1111_DRV(dev->driver); int ret = 0; @@ -1487,7 +1487,7 @@ static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size) static int sa1111_notifier_call(struct notifier_block *n, unsigned long action, void *data) { - struct sa1111_dev *dev = SA1111_DEV(data); + struct sa1111_dev *dev = to_sa1111_device(data); switch (action) { case BUS_NOTIFY_ADD_DEVICE: diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h index 267e43b5225f..8979fa3bbf2d 100644 --- a/arch/arm/include/asm/hardware/sa1111.h +++ b/arch/arm/include/asm/hardware/sa1111.h @@ -420,7 +420,7 @@ struct sa1111_dev { u64 dma_mask; }; -#define SA1111_DEV(_d) container_of((_d), struct sa1111_dev, dev) +#define to_sa1111_device(x) container_of(x, struct sa1111_dev, dev) #define sa1111_get_drvdata(d) dev_get_drvdata(&(d)->dev) #define sa1111_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, p) -- cgit v1.2.3