From cfd9d70a855edf6adb37d0ed88be9e35274dbe49 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Nov 2014 19:27:24 +0530 Subject: ARCv2: mm: TLB Miss optim: SMP builds can cache pgd pointer in mmu scratch reg ARC700 exception (and intr handling) didn't have auto stack switching thus had to rely on stashing a reg temporarily (to free it up) at a known place in memory, allowing to code up the low level stack switching. This however was not re-entrant in SMP which thus had to repurpose the per-cpu MMU SCRATCH DATA register otherwise used to "cache" the task pdg pointer (vs. reading it from mm struct) The newer HS cores do have auto-stack switching and thus even SMP builds can use the MMU SCRATCH reg as originally intended. This patch fixes the restriction to ARC700 SMP builds only Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 4 ++-- arch/arc/include/asm/mmu.h | 4 ++++ arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable.h | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 66a292335ee6..c3aa775878dc 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -130,7 +130,7 @@ * to be saved again on kernel mode stack, as part of pt_regs. *-------------------------------------------------------------*/ .macro PROLOG_FREEUP_REG reg, mem -#ifdef CONFIG_SMP +#ifndef ARC_USE_SCRATCH_REG sr \reg, [ARC_REG_SCRATCH_DATA0] #else st \reg, [\mem] @@ -138,7 +138,7 @@ .endm .macro PROLOG_RESTORE_REG reg, mem -#ifdef CONFIG_SMP +#ifndef ARC_USE_SCRATCH_REG lr \reg, [ARC_REG_SCRATCH_DATA0] #else ld \reg, [\mem] diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 98cadf1a09ac..0abacb82a72b 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -40,6 +40,10 @@ #define ARC_REG_SCRATCH_DATA0 0x46c #endif +#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP) +#define ARC_USE_SCRATCH_REG +#endif + /* Bits in MMU PID register */ #define __TLB_ENABLE (1 << 31) #define __PROG_ENABLE (1 << 30) diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 035470816be5..3a5e6a5b9ed6 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -144,7 +144,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ cpumask_set_cpu(cpu, mm_cpumask(next)); -#ifndef CONFIG_SMP +#ifdef ARC_USE_SCRATCH_REG /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 7addd0301c51..ea14a8bfc691 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -351,7 +351,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * Thus use this macro only when you are certain that "current" is current * e.g. when dealing with signal frame setup code etc */ -#ifndef CONFIG_SMP +#ifdef ARC_USE_SCRATCH_REG #define pgd_offset_fast(mm, addr) \ ({ \ pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \ -- cgit v1.2.3 From ad4c40e937f6d6a08a579c4a78206039618426b7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 17 Nov 2015 10:10:29 +0530 Subject: ARC: mm: tlb flush optim: Make TLBWriteNI fallback to TLBWrite if not available TLBWriteNI was introduced in MMUv2 (to not invalidate uTLBs in Fast Path TLB Refill Handler). To avoid #ifdef'ery make it fallback to TLBWrite availabel on all MMUs. This will also help with next change Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mmu.h | 2 ++ arch/arc/mm/tlbex.S | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/arc/include') diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 0abacb82a72b..26b731d32a2b 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -67,6 +67,8 @@ #if (CONFIG_ARC_MMU_VER >= 2) #define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ +#else +#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */ #endif #if (CONFIG_ARC_MMU_VER >= 4) diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 4c88148d4cd1..2efaf6ca0c06 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -292,11 +292,7 @@ ex_saved_reg1: sr TLBGetIndex, [ARC_REG_TLBCOMMAND] /* Commit the Write */ -#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ sr TLBWriteNI, [ARC_REG_TLBCOMMAND] -#else - sr TLBWrite, [ARC_REG_TLBCOMMAND] -#endif #else sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] -- cgit v1.2.3 From f091d5a426447cc427680bdd3adc7773aa2867df Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 8 Nov 2019 19:20:22 +0300 Subject: ARC: ARCv2: jump label: implement jump label patching Implement jump label patching for ARC. Jump labels provide an interface to generate dynamic branches using self-modifying code. This allows us to implement conditional branches where changing branch direction is expensive but branch selection is basically 'free' This implementation uses 32-bit NOP and BRANCH instructions which forced to be aligned by 4 to guarantee that they don't cross L1 cache line boundary and can be update atomically. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 8 ++ arch/arc/include/asm/cache.h | 2 + arch/arc/include/asm/jump_label.h | 72 ++++++++++++++++ arch/arc/kernel/Makefile | 1 + arch/arc/kernel/jump_label.c | 170 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 253 insertions(+) create mode 100644 arch/arc/include/asm/jump_label.h create mode 100644 arch/arc/kernel/jump_label.c (limited to 'arch/arc/include') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8383155c8c82..375f9d278139 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -46,6 +46,7 @@ config ARC select OF_EARLY_FLATTREE select PCI_SYSCALL if PCI select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING + select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 config ARCH_HAS_CACHE_LINE_SIZE def_bool y @@ -525,6 +526,13 @@ config ARC_DW2_UNWIND config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" +config ARC_DBG_JUMP_LABEL + bool "Paranoid checks in Static Keys (jump labels) code" + depends on JUMP_LABEL + default y if STATIC_KEYS_SELFTEST + help + Enable paranoid checks and self-test of both ARC-specific and generic + part of static keys (jump labels) related code. endif config ARC_BUILTIN_DTB_NAME diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 918804c7c1a4..d8ece4292388 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -25,6 +25,8 @@ #ifndef __ASSEMBLY__ +#include + /* Uncached access macros */ #define arc_read_uncached_32(ptr) \ ({ \ diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h new file mode 100644 index 000000000000..9d9618079739 --- /dev/null +++ b/arch/arc/include/asm/jump_label.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARC_JUMP_LABEL_H +#define _ASM_ARC_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +#define JUMP_LABEL_NOP_SIZE 4 + +/* + * NOTE about '.balign 4': + * + * To make atomic update of patched instruction available we need to guarantee + * that this instruction doesn't cross L1 cache line boundary. + * + * As of today we simply align instruction which can be patched by 4 byte using + * ".balign 4" directive. In that case patched instruction is aligned with one + * 16-bit NOP_S if this is required. + * However 'align by 4' directive is much stricter than it actually required. + * It's enough that our 32-bit instruction don't cross L1 cache line boundary / + * L1 I$ fetch block boundary which can be achieved by using + * ".bundle_align_mode" assembler directive. That will save us from adding + * useless NOP_S padding in most of the cases. + * + * TODO: switch to ".bundle_align_mode" directive using whin it will be + * supported by ARC toolchain. + */ + +static __always_inline bool arch_static_branch(struct static_key *key, + bool branch) +{ + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + "1: \n" + "nop \n" + ".pushsection __jump_table, \"aw\" \n" + ".word 1b, %l[l_yes], %c0 \n" + ".popsection \n" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, + bool branch) +{ + asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + "1: \n" + "b %l[l_yes] \n" + ".pushsection __jump_table, \"aw\" \n" + ".word 1b, %l[l_yes], %c0 \n" + ".popsection \n" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +typedef u32 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index de6251132310..e784f5396dda 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o CFLAGS_fpu.o += -mdpfp diff --git a/arch/arc/kernel/jump_label.c b/arch/arc/kernel/jump_label.c new file mode 100644 index 000000000000..b8600dc325b5 --- /dev/null +++ b/arch/arc/kernel/jump_label.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#include "asm/cacheflush.h" + +#define JUMPLABEL_ERR "ARC: jump_label: ERROR: " + +/* Halt system on fatal error to make debug easier */ +#define arc_jl_fatal(format...) \ +({ \ + pr_err(JUMPLABEL_ERR format); \ + BUG(); \ +}) + +static inline u32 arc_gen_nop(void) +{ + /* 1x 32bit NOP in middle endian */ + return 0x7000264a; +} + +/* + * Atomic update of patched instruction is only available if this + * instruction doesn't cross L1 cache line boundary. You can read about + * the way we achieve this in arc/include/asm/jump_label.h + */ +static inline void instruction_align_assert(void *addr, int len) +{ + unsigned long a = (unsigned long)addr; + + if ((a >> L1_CACHE_SHIFT) != ((a + len - 1) >> L1_CACHE_SHIFT)) + arc_jl_fatal("instruction (addr %px) cross L1 cache line border", + addr); +} + +/* + * ARCv2 'Branch unconditionally' instruction: + * 00000ssssssssss1SSSSSSSSSSNRtttt + * s S[n:0] lower bits signed immediate (number is bitfield size) + * S S[m:n+1] upper bits signed immediate (number is bitfield size) + * t S[24:21] upper bits signed immediate (branch unconditionally far) + * N N <.d> delay slot mode + * R R Reserved + */ +static inline u32 arc_gen_branch(jump_label_t pc, jump_label_t target) +{ + u32 instruction_l, instruction_r; + u32 pcl = pc & GENMASK(31, 2); + u32 u_offset = target - pcl; + u32 s, S, t; + + /* + * Offset in 32-bit branch instruction must to fit into s25. + * Something is terribly broken if we get such huge offset within one + * function. + */ + if ((s32)u_offset < -16777216 || (s32)u_offset > 16777214) + arc_jl_fatal("gen branch with offset (%d) not fit in s25", + (s32)u_offset); + + /* + * All instructions are aligned by 2 bytes so we should never get offset + * here which is not 2 bytes aligned. + */ + if (u_offset & 0x1) + arc_jl_fatal("gen branch with offset (%d) unaligned to 2 bytes", + (s32)u_offset); + + s = (u_offset >> 1) & GENMASK(9, 0); + S = (u_offset >> 11) & GENMASK(9, 0); + t = (u_offset >> 21) & GENMASK(3, 0); + + /* 00000ssssssssss1 */ + instruction_l = (s << 1) | 0x1; + /* SSSSSSSSSSNRtttt */ + instruction_r = (S << 6) | t; + + return (instruction_r << 16) | (instruction_l & GENMASK(15, 0)); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + jump_label_t *instr_addr = (jump_label_t *)entry->code; + u32 instr; + + instruction_align_assert(instr_addr, JUMP_LABEL_NOP_SIZE); + + if (type == JUMP_LABEL_JMP) + instr = arc_gen_branch(entry->code, entry->target); + else + instr = arc_gen_nop(); + + WRITE_ONCE(*instr_addr, instr); + flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + /* + * We use only one NOP type (1x, 4 byte) in arch_static_branch, so + * there's no need to patch an identical NOP over the top of it here. + * The generic code calls 'arch_jump_label_transform' if the NOP needs + * to be replaced by a branch, so 'arch_jump_label_transform_static' is + * never called with type other than JUMP_LABEL_NOP. + */ + BUG_ON(type != JUMP_LABEL_NOP); +} + +#ifdef CONFIG_ARC_DBG_JUMP_LABEL +#define SELFTEST_MSG "ARC: instruction generation self-test: " + +struct arc_gen_branch_testdata { + jump_label_t pc; + jump_label_t target_address; + u32 expected_instr; +}; + +static __init int branch_gen_test(const struct arc_gen_branch_testdata *test) +{ + u32 instr_got; + + instr_got = arc_gen_branch(test->pc, test->target_address); + if (instr_got == test->expected_instr) + return 0; + + pr_err(SELFTEST_MSG "FAIL:\n arc_gen_branch(0x%08x, 0x%08x) != 0x%08x, got 0x%08x\n", + test->pc, test->target_address, + test->expected_instr, instr_got); + + return -EFAULT; +} + +/* + * Offset field in branch instruction is not continuous. Test all + * available offset field and sign combinations. Test data is generated + * from real working code. + */ +static const struct arc_gen_branch_testdata arcgenbr_test_data[] __initconst = { + {0x90007548, 0x90007514, 0xffcf07cd}, /* tiny (-52) offs */ + {0x9000c9c0, 0x9000c782, 0xffcf05c3}, /* tiny (-574) offs */ + {0x9000cc1c, 0x9000c782, 0xffcf0367}, /* tiny (-1178) offs */ + {0x9009dce0, 0x9009d106, 0xff8f0427}, /* small (-3034) offs */ + {0x9000f5de, 0x90007d30, 0xfc0f0755}, /* big (-30892) offs */ + {0x900a2444, 0x90035f64, 0xc9cf0321}, /* huge (-443616) offs */ + {0x90007514, 0x9000752c, 0x00000019}, /* tiny (+24) offs */ + {0x9001a578, 0x9001a77a, 0x00000203}, /* tiny (+514) offs */ + {0x90031ed8, 0x90032634, 0x0000075d}, /* tiny (+1884) offs */ + {0x9008c7f2, 0x9008d3f0, 0x00400401}, /* small (+3072) offs */ + {0x9000bb38, 0x9003b340, 0x17c00009}, /* big (+194568) offs */ + {0x90008f44, 0x90578d80, 0xb7c2063d} /* huge (+5701180) offs */ +}; + +static __init int instr_gen_test(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(arcgenbr_test_data); i++) + if (branch_gen_test(&arcgenbr_test_data[i])) + return -EFAULT; + + pr_info(SELFTEST_MSG "OK\n"); + + return 0; +} +early_initcall(instr_gen_test); + +#endif /* CONFIG_ARC_DBG_JUMP_LABEL */ -- cgit v1.2.3