From 5a364c2a1762e8a78721fafc93144509c0b6cb84 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 6 Feb 2015 18:44:57 +0300 Subject: ARC: mm: PAE40 support This is the first working implementation of 40-bit physical address extension on ARCv2. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 44 ++++++++++++++++++++++++++++++++++++++++---- arch/arc/mm/tlb.c | 27 ++++++++++++++++++++++----- arch/arc/mm/tlbex.S | 11 ++++++++++- 3 files changed, 72 insertions(+), 10 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 91e28d791d56..ff7ff6cbb811 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, } } +/* + * For ARC700 MMUv3 I-cache and D-cache flushes + * Also reused for HS38 aliasing I-cache configuration + */ static inline void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) @@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, if (full_page) write_aux_reg(aux_tag, paddr); + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuratino with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + while (num_lines-- > 0) { if (!full_page) { write_aux_reg(aux_tag, paddr); @@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, } /* - * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache - * maintenance ops (in IVIL reg), as long as icache doesn't alias. + * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT + * Here's how cache ops are implemented + * + * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) + * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) + * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG + * respectively, similar to MMU v3 programming model, hence + * __cache_line_loop_v3() is used) * - * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is - * specified in PTAG (similar to MMU v3) + * If PAE40 is enabled, independent of aliasing considerations, the higher bits + * needs to be written into PTAG_HI */ static inline void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, @@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (cacheop == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + while (num_lines-- > 0) { write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index b5f28dc0f924..0ee739846847 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } @@ -182,7 +186,7 @@ static void utlb_invalidate(void) } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { unsigned int idx; @@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { write_aux_reg(ARC_REG_TLBPD0, pd0); write_aux_reg(ARC_REG_TLBPD1, pd1); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); } @@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void) /* Load PD0 and PD1 with template for a Blank Entry */ write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); for (entry = 0; entry < num_tlb; entry++) { @@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; - unsigned long pd0, pd1; + unsigned long pd0; + pte_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, - p_mmu->u_dtlb, p_mmu->u_itlb); + p_mmu->u_dtlb, p_mmu->u_itlb, + IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } @@ -821,6 +835,9 @@ void arc_mmu_init(void) panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 8d1b81990bc1..63860adc4814 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -223,12 +223,16 @@ ex_saved_reg1: ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index ; (3) z = (pgtbl + y * 4) +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else #define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ +#endif ; multiply in step (3) above avoided by shifting lesser in step (1) lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) - ld.aw r0, [r1, r0] ; r0: PTE + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) ; r1: PTE ptr 2: @@ -247,6 +251,7 @@ ex_saved_reg1: ;----------------------------------------------------------------- ; Convert Linux PTE entry into TLB entry ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB @@ -259,6 +264,10 @@ ex_saved_reg1: or r3, r3, r2 sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb -- cgit v1.2.3