From 7d3d162bbd515070dfa4f422778276aa28f114d4 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 23 Jan 2017 19:32:23 -0800 Subject: ARC: mm: Move full_page computation into cache version agnostic wrapper This reduces code duplication in each of cache version specific handlers Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 928562967f3c..18132eb56150 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -28,7 +28,7 @@ unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int cacheop); + unsigned long sz, const int op, const int full_page); void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz); void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz); @@ -233,11 +233,10 @@ slc_chk: static inline void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int op) + unsigned long sz, const int op, const int full_page) { unsigned int aux_cmd; int num_lines; - const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; @@ -279,11 +278,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, */ static inline void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int op) + unsigned long sz, const int op, const int full_page) { unsigned int aux_cmd, aux_tag; int num_lines; - const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; @@ -349,17 +347,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, */ static inline void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) + unsigned long sz, const int op, const int full_page) { unsigned int aux_cmd; int num_lines; - const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; - if (cacheop == OP_INV_IC) { + if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; } else { /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; } /* Ensure we properly floor/ceil the non-line aligned/sized requests @@ -368,7 +365,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, * -@paddr will be cache-line aligned already (being page aligned) * -@sz will be integral multiple of line size (being page sized). */ - if (!full_page_op) { + if (!full_page) { sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; } @@ -381,7 +378,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, * - (and needs to be written before the lower 32 bits) */ if (is_pae40_enabled()) { - if (cacheop == OP_INV_IC) + if (op == OP_INV_IC) /* * Non aliasing I-cache in HS38, * aliasing I-cache handled in __cache_line_loop_v3() @@ -486,13 +483,14 @@ static void __dc_enable(void) static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; unsigned long flags; local_irq_save(flags); __before_dc_op(op); - __cache_line_loop(paddr, vaddr, sz, op); + __cache_line_loop(paddr, vaddr, sz, op, full_page); __after_dc_op(op); @@ -521,10 +519,11 @@ static inline void __ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; unsigned long flags; local_irq_save(flags); - (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC, full_page); local_irq_restore(flags); } -- cgit v1.2.3 From 0d77117fc5c0333d024a183d6790167bb90c3b62 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 29 Aug 2014 10:55:15 +0530 Subject: ARCv2: mm: Implement cache region flush operations These are more efficient than the per-line ops Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 6 ++++ arch/arc/mm/cache.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) (limited to 'arch/arc/mm') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 5008021fba98..16e457706129 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -62,6 +62,8 @@ extern unsigned long perip_base, perip_end; #define ARC_REG_IC_BCR 0x77 /* Build Config reg */ #define ARC_REG_IC_IVIC 0x10 #define ARC_REG_IC_CTRL 0x11 +#define ARC_REG_IC_IVIR 0x16 +#define ARC_REG_IC_ENDR 0x17 #define ARC_REG_IC_IVIL 0x19 #define ARC_REG_IC_PTAG 0x1E #define ARC_REG_IC_PTAG_HI 0x1F @@ -76,6 +78,8 @@ extern unsigned long perip_base, perip_end; #define ARC_REG_DC_IVDL 0x4A #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C +#define ARC_REG_DC_STARTR 0x4D +#define ARC_REG_DC_ENDR 0x4E #define ARC_REG_DC_PTAG 0x5C #define ARC_REG_DC_PTAG_HI 0x5F @@ -83,6 +87,8 @@ extern unsigned long perip_base, perip_end; #define DC_CTRL_DIS 0x001 #define DC_CTRL_INV_MODE_FLUSH 0x040 #define DC_CTRL_FLUSH_STATUS 0x100 +#define DC_CTRL_RGN_OP_INV 0x200 +#define DC_CTRL_RGN_OP_MSK 0xE00 /*System-level cache (L2 cache) related Auxiliary registers */ #define ARC_REG_SLC_CFG 0x901 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 18132eb56150..8401fcb75d19 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -21,6 +21,10 @@ #include #include +#ifdef CONFIG_ISA_ARCV2 +#define USE_RGN_FLSH 1 +#endif + static int l2_line_sz; static int ioc_exists; int slc_enable = 1, ioc_enable = 1; @@ -332,6 +336,8 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, } } +#ifndef USE_RGN_FLSH + /* * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT * Here's how cache ops are implemented @@ -394,6 +400,68 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, } } +#else + +/* + * optimized flush operation which takes a region as opposed to iterating per line + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int s, e, val; + + /* Only for Non aliasing I-cache in HS38 */ + if (op == OP_INV_IC) { + s = ARC_REG_IC_IVIR; + e = ARC_REG_IC_ENDR; + } else { + s = ARC_REG_DC_STARTR; + e = ARC_REG_DC_ENDR; + } + + if (!full_page) { + /* for any leading gap between @paddr and start of cache line */ + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + + /* + * account for any trailing gap to end of cache line + * this is equivalent to DIV_ROUND_UP() in line ops above + */ + sz += L1_CACHE_BYTES - 1; + } + + if (is_pae40_enabled()) { + /* TBD: check if crossing 4TB boundary */ + if (op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + /* + * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1 + * Flush-n-invalidate additionally uses setting DC_CTRL.IM = 1 + * just as for line ops which is handled in __before_dc_op() + */ + val = read_aux_reg(ctl) & ~DC_CTRL_RGN_OP_MSK; + + if (op & OP_INV) + val |= DC_CTRL_RGN_OP_INV; + + write_aux_reg(ctl, val); + + /* ENDR needs to be set ahead of START */ + write_aux_reg(e, paddr + sz); /* ENDR is exclusive */ + write_aux_reg(s, paddr); + + /* caller waits on DC_CTRL.FS */ +} + +#endif + #if (CONFIG_ARC_MMU_VER < 3) #define __cache_line_loop __cache_line_loop_v2 #elif (CONFIG_ARC_MMU_VER == 3) -- cgit v1.2.3 From ee40bd1e0c3fb83d810f258952692ffdebc14726 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 2 May 2017 15:28:12 -0700 Subject: ARCv2: mm: Merge 2 updates to DC_CTRL for region flush Region Flush has a weird programming model. 1. Flush or Invalidate is selected by DC_CTRL.RGN_OP 2 Flush-n-Invalidate is done by DC_CTRL.IM Given the code structuring before, case #2 above was generating two seperate updates to DC_CTRL which was pointless. | 80a342b0 <__dma_cache_wback_inv_l1>: | 80a342b0: clri r4 | 80a342b4: lr r2,[dc_ctrl] | 80a342b8: bset_s r2,r2,0x6 | 80a342ba: sr r2,[dc_ctrl] <-- FIRST | | 80a342be: bmskn r3,r0,0x5 | | 80a342c2: lr r2,[dc_ctrl] | 80a342c6: and r2,r2,0xfffff1ff | 80a342ce: bset_s r2,r2,0x9 | 80a342d0: sr r2,[dc_ctrl] <-- SECOND | | 80a342d4: add_s r1,r1,0x3f | 80a342d6: bmsk_s r0,r0,0x5 | 80a342d8: add_s r0,r0,r1 | 80a342da: add_s r0,r0,r3 | 80a342dc: sr r0,[78] | 80a342e0: sr r3,[77] |... |... So move setting of DC_CTRL.RGN_OP into __before_dc_op() and combine with any other update. | 80b63324 <__dma_cache_wback_inv_l1>: | 80b63324: clri r3 | 80b63328: lr r2,[dc_ctrl] | 80b6332c: and r2,r2,0xfffff1ff | 80b63334: or r2,r2,576 | 80b63338: sr r2,[dc_ctrl] | | 80b6333c: add_s r1,r1,0x3f | 80b6333e: bmskn r2,r0,0x5 | 80b63342: add_s r0,r0,r1 | 80b63344: sr r0,[78] | 80b63348: sr r2,[77] Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 8401fcb75d19..a867575a758b 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -409,8 +409,7 @@ static inline void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op, const int full_page) { - const unsigned int ctl = ARC_REG_DC_CTRL; - unsigned int s, e, val; + unsigned int s, e; /* Only for Non aliasing I-cache in HS38 */ if (op == OP_INV_IC) { @@ -441,18 +440,6 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); } - /* - * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1 - * Flush-n-invalidate additionally uses setting DC_CTRL.IM = 1 - * just as for line ops which is handled in __before_dc_op() - */ - val = read_aux_reg(ctl) & ~DC_CTRL_RGN_OP_MSK; - - if (op & OP_INV) - val |= DC_CTRL_RGN_OP_INV; - - write_aux_reg(ctl, val); - /* ENDR needs to be set ahead of START */ write_aux_reg(e, paddr + sz); /* ENDR is exclusive */ write_aux_reg(s, paddr); @@ -476,6 +463,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, * Machine specific helpers for Entire D-Cache or Per Line ops */ +#ifndef USE_RGN_FLSH +/* + * this version avoids extra read/write of DC_CTRL for flush or invalid ops + * in the non region flush regime (such as for ARCompact) + */ static inline void __before_dc_op(const int op) { if (op == OP_FLUSH_N_INV) { @@ -489,6 +481,32 @@ static inline void __before_dc_op(const int op) } } +#else + +static inline void __before_dc_op(const int op) +{ + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int val = read_aux_reg(ctl); + + if (op == OP_FLUSH_N_INV) { + val |= DC_CTRL_INV_MODE_FLUSH; + } + + if (op != OP_INV_IC) { + /* + * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1 + * combined Flush-n-invalidate uses DC_CTRL.IM = 1 set above + */ + val &= ~DC_CTRL_RGN_OP_MSK; + if (op & OP_INV) + val |= DC_CTRL_RGN_OP_INV; + } + write_aux_reg(ctl, val); +} + +#endif + + static inline void __after_dc_op(const int op) { if (op & OP_FLUSH) { -- cgit v1.2.3