diff options
author | Michael Ellerman <mpe@ellerman.id.au> | 2018-10-02 23:56:39 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2018-10-03 15:32:49 +1000 |
commit | 54be0b9c7c9888ebe63b89a31a17ee3df6a68d61 (patch) | |
tree | 12870e5fff70fc0676b5a63719beca3b1d1896a5 /arch/powerpc/mm | |
parent | 0823c68b054bca9dc321adea829af5cf36afb30b (diff) | |
download | linux-54be0b9c7c9888ebe63b89a31a17ee3df6a68d61.tar.bz2 |
Revert "convert SLB miss handlers to C" and subsequent commits
This reverts commits:
5e46e29e6a97 ("powerpc/64s/hash: convert SLB miss handlers to C")
8fed04d0f6ae ("powerpc/64s/hash: remove user SLB data from the paca")
655deecf67b2 ("powerpc/64s/hash: SLB allocation status bitmaps")
2e1626744e8d ("powerpc/64s/hash: provide arch_setup_exec hooks for hash slice setup")
89ca4e126a3f ("powerpc/64s/hash: Add a SLB preload cache")
This series had a few bugs, and the fixes are not all trivial. So
revert most of it for now.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_book3s64.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 485 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 335 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 43 |
7 files changed, 550 insertions, 373 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 892d4e061d62..cdf6a9960046 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o +obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88c95dc8b141..f23a89d8e4ce 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1088,16 +1088,16 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -static unsigned int get_psize(struct mm_struct *mm, unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { unsigned char *psizes; unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - psizes = mm->context.low_slices_psize; + psizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); } else { - psizes = mm->context.high_slices_psize; + psizes = get_paca()->mm_ctx_high_slices_psize; index = GET_HIGH_SLICE_INDEX(addr); } mask_index = index & 0x1; @@ -1105,9 +1105,9 @@ static unsigned int get_psize(struct mm_struct *mm, unsigned long addr) } #else -unsigned int get_psize(struct mm_struct *mm, unsigned long addr) +unsigned int get_paca_psize(unsigned long addr) { - return mm->context.user_psize; + return get_paca()->mm_ctx_user_psize; } #endif @@ -1118,11 +1118,15 @@ unsigned int get_psize(struct mm_struct *mm, unsigned long addr) #ifdef CONFIG_PPC_64K_PAGES void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { - if (get_psize(mm, addr) == MMU_PAGE_4K) + if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); copro_flush_all_slbs(mm); - core_flush_all_slbs(mm); + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { + + copy_mm_to_paca(mm); + slb_flush_and_rebolt(); + } } #endif /* CONFIG_PPC_64K_PAGES */ @@ -1187,6 +1191,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access, trap, vsid, ssize, psize, lpsize, pte); } +static void check_paca_psize(unsigned long ea, struct mm_struct *mm, + int psize, bool user_region) +{ + if (user_region) { + if (psize != get_paca_psize(ea)) { + copy_mm_to_paca(mm); + slb_flush_and_rebolt(); + } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_vmalloc_update(); + } +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -1219,7 +1239,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, rc = 1; goto bail; } - psize = get_psize(mm, ea); + psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_user_vsid(&mm->context, ea, ssize); break; @@ -1307,6 +1327,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, WARN_ON(1); } #endif + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); + goto bail; } @@ -1341,14 +1364,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; - slb_vmalloc_update(); copro_flush_all_slbs(mm); - core_flush_all_slbs(mm); } } #endif /* CONFIG_PPC_64K_PAGES */ + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); + #ifdef CONFIG_PPC_64K_PAGES if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, @@ -1436,7 +1460,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { - int psize = get_psize(mm, ea); + int psize = get_slice_psize(mm, ea); /* We only prefault standard pages for now */ if (unlikely(psize != mm->context.user_psize)) diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 28ae2835db3d..f84e14f23e50 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -54,7 +54,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or load of MMU context id. + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). * * On the other side, the barrier is in mm/tlb-radix.c for * radix which orders earlier stores to clear the PTEs vs diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 510f103d7813..dbd8f762140b 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -53,8 +53,6 @@ int hash__alloc_context_id(void) } EXPORT_SYMBOL_GPL(hash__alloc_context_id); -void slb_setup_new_exec(void); - static int hash__init_new_context(struct mm_struct *mm) { int index; @@ -86,13 +84,6 @@ static int hash__init_new_context(struct mm_struct *mm) return index; } -void hash__setup_new_exec(void) -{ - slice_setup_new_exec(); - - slb_setup_new_exec(); -} - static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index b438220c4336..513c6596140d 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -14,7 +14,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/asm-prototypes.h> #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -34,7 +33,7 @@ enum slb_index { KSTACK_INDEX = 1, /* Kernel stack map */ }; -static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); +extern void slb_allocate(unsigned long ea); #define slb_esid_mask(ssize) \ (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) @@ -45,17 +44,11 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; } -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, unsigned long flags) { - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); + return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags | + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); } static inline void slb_shadow_update(unsigned long ea, int ssize, @@ -122,9 +115,6 @@ void slb_restore_bolted_realmode(void) { __slb_restore_bolted_realmode(); get_paca()->slb_cache_ptr = 0; - - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; } /* @@ -132,6 +122,9 @@ void slb_restore_bolted_realmode(void) */ void slb_flush_all_realmode(void) { + /* + * This flushes all SLB entries including 0, so it must be realmode. + */ asm volatile("slbmte %0,%0; slbia" : : "r" (0)); } @@ -177,9 +170,6 @@ void slb_flush_and_rebolt(void) : "memory"); get_paca()->slb_cache_ptr = 0; - - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; } void slb_save_contents(struct slb_entry *slb_ptr) @@ -212,7 +202,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr) return; pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); - pr_err("Last SLB entry inserted at slot %u\n", get_paca()->stab_rr); + pr_err("Last SLB entry inserted at slot %lld\n", get_paca()->stab_rr); for (i = 0; i < mmu_slb_size; i++) { e = slb_ptr->esid; @@ -257,119 +247,41 @@ void slb_vmalloc_update(void) slb_flush_and_rebolt(); } -static bool preload_hit(struct thread_info *ti, unsigned long esid) -{ - u8 i; - - for (i = 0; i < ti->slb_preload_nr; i++) { - u8 idx; - - idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; - if (esid == ti->slb_preload_esid[idx]) - return true; - } - return false; -} - -static bool preload_add(struct thread_info *ti, unsigned long ea) -{ - unsigned long esid; - u8 idx; - - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { - /* EAs are stored >> 28 so 256MB segments don't need clearing */ - if (ea & ESID_MASK_1T) - ea &= ESID_MASK_1T; - } - - esid = ea >> SID_SHIFT; - - if (preload_hit(ti, esid)) - return false; - - idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR; - ti->slb_preload_esid[idx] = esid; - if (ti->slb_preload_nr == SLB_PRELOAD_NR) - ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; - else - ti->slb_preload_nr++; - - return true; -} - -static void preload_age(struct thread_info *ti) -{ - if (!ti->slb_preload_nr) - return; - ti->slb_preload_nr--; - ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; -} - -void slb_setup_new_exec(void) +/* Helper function to compare esids. There are four cases to handle. + * 1. The system is not 1T segment size capable. Use the GET_ESID compare. + * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare. + * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match. + * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare. + */ +static inline int esids_match(unsigned long addr1, unsigned long addr2) { - struct thread_info *ti = current_thread_info(); - struct mm_struct *mm = current->mm; - unsigned long exec = 0x10000000; + int esid_1t_count; - /* - * We have no good place to clear the slb preload cache on exec, - * flush_thread is about the earliest arch hook but that happens - * after we switch to the mm and have aleady preloaded the SLBEs. - * - * For the most part that's probably okay to use entries from the - * previous exec, they will age out if unused. It may turn out to - * be an advantage to clear the cache before switching to it, - * however. - */ - - /* - * preload some userspace segments into the SLB. - * Almost all 32 and 64bit PowerPC executables are linked at - * 0x10000000 so it makes sense to preload this segment. - */ - if (!is_kernel_addr(exec)) { - if (preload_add(ti, exec)) - slb_allocate_user(mm, exec); - } - - /* Libraries and mmaps. */ - if (!is_kernel_addr(mm->mmap_base)) { - if (preload_add(ti, mm->mmap_base)) - slb_allocate_user(mm, mm->mmap_base); - } -} + /* System is not 1T segment size capable. */ + if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) + return (GET_ESID(addr1) == GET_ESID(addr2)); -void preload_new_slb_context(unsigned long start, unsigned long sp) -{ - struct thread_info *ti = current_thread_info(); - struct mm_struct *mm = current->mm; - unsigned long heap = mm->start_brk; + esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + + ((addr2 >> SID_SHIFT_1T) != 0)); - /* Userspace entry address. */ - if (!is_kernel_addr(start)) { - if (preload_add(ti, start)) - slb_allocate_user(mm, start); - } + /* both addresses are < 1T */ + if (esid_1t_count == 0) + return (GET_ESID(addr1) == GET_ESID(addr2)); - /* Top of stack, grows down. */ - if (!is_kernel_addr(sp)) { - if (preload_add(ti, sp)) - slb_allocate_user(mm, sp); - } + /* One address < 1T, the other > 1T. Not a match */ + if (esid_1t_count == 1) + return 0; - /* Bottom of heap, grows up. */ - if (heap && !is_kernel_addr(heap)) { - if (preload_add(ti, heap)) - slb_allocate_user(mm, heap); - } + /* Both addresses are > 1T. */ + return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2)); } - /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - struct thread_info *ti = task_thread_info(tsk); - u8 i; + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long exec_base; /* * We need interrupts hard-disabled here, not just soft-disabled, @@ -392,6 +304,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { unsigned long slbie_data = 0; + int i; asm volatile("isync" : : : "memory"); for (i = 0; i < offset; i++) { @@ -422,60 +335,67 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) "isync" :: "r"(ksp_vsid_data), "r"(ksp_esid_data)); - - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; } get_paca()->slb_cache_ptr = 0; } - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; + + copy_mm_to_paca(mm); /* - * We gradually age out SLBs after a number of context switches to - * reduce reload overhead of unused entries (like we do with FP/VEC - * reload). Each time we wrap 256 switches, take an entry out of the - * SLB preload cache. + * preload some userspace segments into the SLB. + * Almost all 32 and 64bit PowerPC executables are linked at + * 0x10000000 so it makes sense to preload this segment. */ - tsk->thread.load_slb++; - if (!tsk->thread.load_slb) { - unsigned long pc = KSTK_EIP(tsk); + exec_base = 0x10000000; - preload_age(ti); - preload_add(ti, pc); - } + if (is_kernel_addr(pc) || is_kernel_addr(stack) || + is_kernel_addr(exec_base)) + return; - for (i = 0; i < ti->slb_preload_nr; i++) { - unsigned long ea; - u8 idx; + slb_allocate(pc); - idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; - ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT; + if (!esids_match(pc, stack)) + slb_allocate(stack); - slb_allocate_user(mm, ea); - } + if (!esids_match(pc, exec_base) && + !esids_match(stack, exec_base)) + slb_allocate(exec_base); } -void slb_set_size(u16 size) +static inline void patch_slb_encoding(unsigned int *insn_addr, + unsigned int immed) { - mmu_slb_size = size; + + /* + * This function patches either an li or a cmpldi instruction with + * a new immediate value. This relies on the fact that both li + * (which is actually addi) and cmpldi both take a 16-bit immediate + * value, and it is situated in the same location in the instruction, + * ie. bits 16-31 (Big endian bit order) or the lower 16 bits. + * The signedness of the immediate operand differs between the two + * instructions however this code is only ever patching a small value, + * much less than 1 << 15, so we can get away with it. + * To patch the value we read the existing instruction, clear the + * immediate value, and or in our new value, then write the instruction + * back. + */ + unsigned int insn = (*insn_addr & 0xffff0000) | immed; + patch_instruction(insn_addr, insn); } -static void cpu_flush_slb(void *parm) -{ - struct mm_struct *mm = parm; - unsigned long flags; +extern u32 slb_miss_kernel_load_linear[]; +extern u32 slb_miss_kernel_load_io[]; +extern u32 slb_compare_rr_to_size[]; +extern u32 slb_miss_kernel_load_vmemmap[]; - if (mm != current->active_mm) +void slb_set_size(u16 size) +{ + if (mmu_slb_size == size) return; - local_irq_save(flags); - slb_flush_and_rebolt(); - local_irq_restore(flags); -} - -void core_flush_all_slbs(struct mm_struct *mm) -{ - on_each_cpu(cpu_flush_slb, mm, 1); + mmu_slb_size = size; + patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); } void slb_initialize(void) @@ -497,16 +417,24 @@ void slb_initialize(void) #endif if (!slb_encoding_inited) { slb_encoding_inited = 1; + patch_slb_encoding(slb_miss_kernel_load_linear, + SLB_VSID_KERNEL | linear_llp); + patch_slb_encoding(slb_miss_kernel_load_io, + SLB_VSID_KERNEL | io_llp); + patch_slb_encoding(slb_compare_rr_to_size, + mmu_slb_size); + pr_devel("SLB: linear LLP = %04lx\n", linear_llp); pr_devel("SLB: io LLP = %04lx\n", io_llp); + #ifdef CONFIG_SPARSEMEM_VMEMMAP + patch_slb_encoding(slb_miss_kernel_load_vmemmap, + SLB_VSID_KERNEL | vmemmap_llp); pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); #endif } get_paca()->stab_rr = SLB_NUM_BOLTED - 1; - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; lflags = SLB_VSID_KERNEL | linear_llp; @@ -530,14 +458,53 @@ void slb_initialize(void) asm volatile("isync":::"memory"); } -static void slb_cache_update(unsigned long esid_data) +static void insert_slb_entry(unsigned long vsid, unsigned long ea, + int bpsize, int ssize) { + unsigned long flags, vsid_data, esid_data; + enum slb_index index; int slb_cache_index; if (cpu_has_feature(CPU_FTR_ARCH_300)) return; /* ISAv3.0B and later does not use slb_cache */ /* + * We are irq disabled, hence should be safe to access PACA. + */ + VM_WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + + index = get_paca()->stab_rr; + + /* + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. + */ + if (index < (mmu_slb_size - 1)) + index++; + else + index = SLB_NUM_BOLTED; + + get_paca()->stab_rr = index; + + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; + vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); + esid_data = mk_esid_data(ea, ssize, index); + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + * Also we only handle user segments here. + */ + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) + : "memory"); + + /* * Now update slb cache entries */ slb_cache_index = get_paca()->slb_cache_ptr; @@ -558,196 +525,58 @@ static void slb_cache_update(unsigned long esid_data) } } -static enum slb_index alloc_slb_index(bool kernel) +static void handle_multi_context_slb_miss(int context_id, unsigned long ea) { - enum slb_index index; - - /* - * The allocation bitmaps can become out of synch with the SLB - * when the _switch code does slbie when bolting a new stack - * segment and it must not be anywhere else in the SLB. This leaves - * a kernel allocated entry that is unused in the SLB. With very - * large systems or small segment sizes, the bitmaps could slowly - * fill with these entries. They will eventually be cleared out - * by the round robin allocator in that case, so it's probably not - * worth accounting for. - */ + struct mm_struct *mm = current->mm; + unsigned long vsid; + int bpsize; /* - * SLBs beyond 32 entries are allocated with stab_rr only - * POWER7/8/9 have 32 SLB entries, this could be expanded if a - * future CPU has more. + * We are always above 1TB, hence use high user segment size. */ - if (get_paca()->slb_used_bitmap != U32_MAX) { - index = ffz(get_paca()->slb_used_bitmap); - get_paca()->slb_used_bitmap |= 1U << index; - if (kernel) - get_paca()->slb_kern_bitmap |= 1U << index; - } else { - /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */ - index = get_paca()->stab_rr; - if (index < (mmu_slb_size - 1)) - index++; - else - index = SLB_NUM_BOLTED; - get_paca()->stab_rr = index; - if (index < 32) { - if (kernel) - get_paca()->slb_kern_bitmap |= 1U << index; - else - get_paca()->slb_kern_bitmap &= ~(1U << index); - } - } - BUG_ON(index < SLB_NUM_BOLTED); - - return index; + vsid = get_vsid(context_id, ea, mmu_highuser_ssize); + bpsize = get_slice_psize(mm, ea); + insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); } -static long slb_insert_entry(unsigned long ea, unsigned long context, - unsigned long flags, int ssize, bool kernel) +void slb_miss_large_addr(struct pt_regs *regs) { - unsigned long vsid; - unsigned long vsid_data, esid_data; - enum slb_index index; - - vsid = get_vsid(context, ea, ssize); - if (!vsid) - return -EFAULT; + enum ctx_state prev_state = exception_enter(); + unsigned long ea = regs->dar; + int context; - index = alloc_slb_index(kernel); - - vsid_data = __mk_vsid_data(vsid, ssize, flags); - esid_data = mk_esid_data(ea, ssize, index); + if (REGION_ID(ea) != USER_REGION_ID) + goto slb_bad_addr; /* - * No need for an isync before or after this slbmte. The exception - * we enter with and the rfid we exit with are context synchronizing. - * Also we only handle user segments here. + * Are we beyound what the page table layout supports ? */ - asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + goto slb_bad_addr; - if (!kernel) - slb_cache_update(esid_data); - - return 0; -} - -static long slb_allocate_kernel(unsigned long ea, unsigned long id) -{ - unsigned long context; - unsigned long flags; - int ssize; - - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) - return -EFAULT; - - if (id == KERNEL_REGION_ID) { - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; -#ifdef CONFIG_SPARSEMEM_VMEMMAP - } else if (id == VMEMMAP_REGION_ID) { - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; -#endif - } else if (id == VMALLOC_REGION_ID) { - if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; - else - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; - } else { - return -EFAULT; - } - - ssize = MMU_SEGSIZE_1T; - if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) - ssize = MMU_SEGSIZE_256M; - - context = id - KERNEL_REGION_CONTEXT_OFFSET; - - return slb_insert_entry(ea, context, flags, ssize, true); -} - -static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) -{ - unsigned long context; - unsigned long flags; - int bpsize; - int ssize; + /* Lower address should have been handled by asm code */ + if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) + goto slb_bad_addr; /* * consider this as bad access if we take a SLB miss * on an address above addr limit. */ - if (ea >= mm->context.slb_addr_limit) - return -EFAULT; + if (ea >= current->mm->context.slb_addr_limit) + goto slb_bad_addr; - context = get_ea_context(&mm->context, ea); + context = get_ea_context(¤t->mm->context, ea); if (!context) - return -EFAULT; - - if (unlikely(ea >= H_PGTABLE_RANGE)) { - WARN_ON(1); - return -EFAULT; - } - - ssize = user_segment_size(ea); - - bpsize = get_slice_psize(mm, ea); - flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; - - return slb_insert_entry(ea, context, flags, ssize, false); -} - -long do_slb_fault(struct pt_regs *regs, unsigned long ea) -{ - unsigned long id = REGION_ID(ea); - - /* IRQs are not reconciled here, so can't check irqs_disabled */ - VM_WARN_ON(mfmsr() & MSR_EE); - - if (unlikely(!(regs->msr & MSR_RI))) - return -EINVAL; - - /* - * SLB kernel faults must be very careful not to touch anything - * that is not bolted. E.g., PACA and global variables are okay, - * mm->context stuff is not. - * - * SLB user faults can access all of kernel memory, but must be - * careful not to touch things like IRQ state because it is not - * "reconciled" here. The difficulty is that we must use - * fast_exception_return to return from kernel SLB faults without - * looking at possible non-bolted memory. We could test user vs - * kernel faults in the interrupt handler asm and do a full fault, - * reconcile, ret_from_except for user faults which would make them - * first class kernel code. But for performance it's probably nicer - * if they go via fast_exception_return too. - */ - if (id >= KERNEL_REGION_ID) { - return slb_allocate_kernel(ea, id); - } else { - struct mm_struct *mm = current->mm; - long err; - - if (unlikely(!mm)) - return -EFAULT; + goto slb_bad_addr; - err = slb_allocate_user(mm, ea); - if (!err) - preload_add(current_thread_info(), ea); - - return err; - } -} + handle_multi_context_slb_miss(context, ea); + exception_exit(prev_state); + return; -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) -{ - if (err == -EFAULT) { - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); - else - bad_page_fault(regs, ea, SIGSEGV); - } else if (err == -EINVAL) { - unrecoverable_exception(regs); - } else { - BUG(); - } +slb_bad_addr: + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + else + bad_page_fault(regs, ea, SIGSEGV); + exception_exit(prev_state); } diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S new file mode 100644 index 000000000000..4d2e921d696e --- /dev/null +++ b/arch/powerpc/mm/slb_low.S @@ -0,0 +1,335 @@ +/* + * Low-level SLB routines + * + * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM + * + * Based on earlier C version: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cputable.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/firmware.h> +#include <asm/feature-fixups.h> + +/* + * This macro generates asm code to compute the VSID scramble + * function. Used in slb_allocate() and do_stab_bolted. The function + * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS + * + * rt = register containing the proto-VSID and into which the + * VSID will be stored + * rx = scratch register (clobbered) + * rf = flags + * + * - rt and rx must be different registers + * - The answer will end up in the low VSID_BITS bits of rt. The higher + * bits may contain other garbage, so you may need to mask the + * result. + */ +#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \ + lis rx,VSID_MULTIPLIER_##size@h; \ + ori rx,rx,VSID_MULTIPLIER_##size@l; \ + mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ +/* \ + * powermac get slb fault before feature fixup, so make 65 bit part \ + * the default part of feature fixup \ + */ \ +BEGIN_MMU_FTR_SECTION \ + srdi rx,rt,VSID_BITS_65_##size; \ + clrldi rt,rt,(64-VSID_BITS_65_##size); \ + add rt,rt,rx; \ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_65_##size; \ + add rt,rt,rx; \ + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \ +MMU_FTR_SECTION_ELSE \ + srdi rx,rt,VSID_BITS_##size; \ + clrldi rt,rt,(64-VSID_BITS_##size); \ + add rt,rt,rx; /* add high and low bits */ \ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ + add rt,rt,rx; \ + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \ +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) + + +/* void slb_allocate(unsigned long ea); + * + * Create an SLB entry for the given EA (user or kernel). + * r3 = faulting address, r13 = PACA + * r9, r10, r11 are clobbered by this function + * r3 is preserved. + * No other registers are examined or changed. + */ +_GLOBAL(slb_allocate) + /* + * Check if the address falls within the range of the first context, or + * if we may need to handle multi context. For the first context we + * allocate the slb entry via the fast path below. For large address we + * branch out to C-code and see if additional contexts have been + * allocated. + * The test here is: + * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) + */ + rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) + bne- 8f + + srdi r9,r3,60 /* get region */ + srdi r10,r3,SID_SHIFT /* get esid */ + cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ + + /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ + blt cr7,0f /* user or kernel? */ + + /* Check if hitting the linear mapping or some other kernel space + */ + bne cr7,1f + + /* Linear mapping encoding bits, the "li" instruction below will + * be patched by the kernel at boot + */ +.globl slb_miss_kernel_load_linear +slb_miss_kernel_load_linear: + li r11,0 + /* + * context = (ea >> 60) - (0xc - 1) + * r9 = region id. + */ + subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET + +BEGIN_FTR_SECTION + b .Lslb_finish_load +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) + b .Lslb_finish_load_1T + +1: +#ifdef CONFIG_SPARSEMEM_VMEMMAP + cmpldi cr0,r9,0xf + bne 1f +/* Check virtual memmap region. To be patched at kernel boot */ +.globl slb_miss_kernel_load_vmemmap +slb_miss_kernel_load_vmemmap: + li r11,0 + b 6f +1: +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ + + /* + * r10 contains the ESID, which is the original faulting EA shifted + * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28) + * which is 0xd00038000. That can't be used as an immediate, even if we + * ignored the 0xd, so we have to load it into a register, and we only + * have one register free. So we must load all of (H_VMALLOC_END >> 28) + * into a register and compare ESID against that. + */ + lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000 + ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800 + // Rotate left 4, then mask with 0xffffffff0 + rldic r11,r11,4,28 // r11 = 0xd00038000 + cmpld r10,r11 // if r10 >= r11 + bge 5f // goto io_mapping + + /* + * vmalloc mapping gets the encoding from the PACA as the mapping + * can be demoted from 64K -> 4K dynamically on some machines. + */ + lhz r11,PACAVMALLOCSLLP(r13) + b 6f +5: + /* IO mapping */ +.globl slb_miss_kernel_load_io +slb_miss_kernel_load_io: + li r11,0 +6: + /* + * context = (ea >> 60) - (0xc - 1) + * r9 = region id. + */ + subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET + +BEGIN_FTR_SECTION + b .Lslb_finish_load +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) + b .Lslb_finish_load_1T + +0: /* + * For userspace addresses, make sure this is region 0. + */ + cmpdi r9, 0 + bne- 8f + /* + * user space make sure we are within the allowed limit + */ + ld r11,PACA_SLB_ADDR_LIMIT(r13) + cmpld r3,r11 + bge- 8f + + /* when using slices, we extract the psize off the slice bitmaps + * and then we need to get the sllp encoding off the mmu_psize_defs + * array. + * + * XXX This is a bit inefficient especially for the normal case, + * so we should try to implement a fast path for the standard page + * size using the old sllp value so we avoid the array. We cannot + * really do dynamic patching unfortunately as processes might flip + * between 4k and 64k standard page size + */ +#ifdef CONFIG_PPC_MM_SLICES + /* r10 have esid */ + cmpldi r10,16 + /* below SLICE_LOW_TOP */ + blt 5f + /* + * Handle hpsizes, + * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index + */ + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */ + addi r9,r11,PACAHIGHSLICEPSIZE + lbzx r9,r13,r9 /* r9 is hpsizes[r11] */ + /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */ + rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63 + b 6f + +5: + /* + * Handle lpsizes + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index + */ + srdi r11,r10,1 /* index */ + addi r9,r11,PACALOWSLICESPSIZE + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ +6: + sldi r11,r11,2 /* index * 4 */ + /* Extract the psize and multiply to get an array offset */ + srd r9,r9,r11 + andi. r9,r9,0xf + mulli r9,r9,MMUPSIZEDEFSIZE + + /* Now get to the array and obtain the sllp + */ + ld r11,PACATOC(r13) + ld r11,mmu_psize_defs@got(r11) + add r11,r11,r9 + ld r11,MMUPSIZESLLP(r11) + ori r11,r11,SLB_VSID_USER +#else + /* paca context sllp already contains the SLB_VSID_USER bits */ + lhz r11,PACACONTEXTSLLP(r13) +#endif /* CONFIG_PPC_MM_SLICES */ + + ld r9,PACACONTEXTID(r13) +BEGIN_FTR_SECTION + cmpldi r10,0x1000 + bge .Lslb_finish_load_1T +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) + b .Lslb_finish_load + +8: /* invalid EA - return an error indication */ + crset 4*cr0+eq /* indicate failure */ + blr + +/* + * Finish loading of an SLB entry and return + * + * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET + */ +.Lslb_finish_load: + rldimi r10,r9,ESID_BITS,0 + ASM_VSID_SCRAMBLE(r10,r9,r11,256M) + /* r3 = EA, r11 = VSID data */ + /* + * Find a slot, round robin. Previously we tried to find a + * free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ + + mr r9,r3 + + /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ +7: ld r10,PACASTABRR(r13) + addi r10,r10,1 + /* This gets soft patched on boot. */ +.globl slb_compare_rr_to_size +slb_compare_rr_to_size: + cmpldi r10,0 + + blt+ 4f + li r10,SLB_NUM_BOLTED + +4: + std r10,PACASTABRR(r13) + +3: + rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ + oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ + + /* r9 = ESID data, r11 = VSID data */ + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + */ + slbmte r11,r10 + + /* we're done for kernel addresses */ + crclr 4*cr0+eq /* set result to "success" */ + bgelr cr7 + + /* Update the slb cache */ + lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r9,SLB_CACHE_ENTRIES + bge 1f + + /* still room in the slb cache */ + sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ + srdi r10,r10,28 /* get the 36 bits of the ESID */ + add r11,r11,r13 /* r11 = (u32 *)paca + offset */ + stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + addi r9,r9,1 /* offset++ */ + b 2f +1: /* offset >= SLB_CACHE_ENTRIES */ + li r9,SLB_CACHE_ENTRIES+1 +2: + sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + crclr 4*cr0+eq /* set result to "success" */ + blr + +/* + * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. + * + * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 + */ +.Lslb_finish_load_1T: + srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ + rldimi r10,r9,ESID_BITS_1T,0 + ASM_VSID_SCRAMBLE(r10,r9,r11,1T) + + li r10,MMU_SEGSIZE_1T + rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ + + /* r3 = EA, r11 = VSID data */ + clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ + b 7b + + +_ASM_NOKPROBE_SYMBOL(slb_allocate) +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) +_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) +#ifdef CONFIG_SPARSEMEM_VMEMMAP +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap) +#endif diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index fc5b3a1ec666..205fe557ca10 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -207,6 +207,23 @@ static bool slice_check_range_fits(struct mm_struct *mm, return true; } +static void slice_flush_segments(void *parm) +{ +#ifdef CONFIG_PPC64 + struct mm_struct *mm = parm; + unsigned long flags; + + if (mm != current->active_mm) + return; + + copy_mm_to_paca(current->active_mm); + + local_irq_save(flags); + slb_flush_and_rebolt(); + local_irq_restore(flags); +#endif +} + static void slice_convert(struct mm_struct *mm, const struct slice_mask *mask, int psize) { @@ -272,9 +289,6 @@ static void slice_convert(struct mm_struct *mm, spin_unlock_irqrestore(&slice_convert_lock, flags); copro_flush_all_slbs(mm); -#ifdef CONFIG_PPC64 - core_flush_all_slbs(mm); -#endif } /* @@ -488,9 +502,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * be already initialised beyond the old address limit. */ mm->context.slb_addr_limit = high_limit; -#ifdef CONFIG_PPC64 - core_flush_all_slbs(mm); -#endif + + on_each_cpu(slice_flush_segments, mm, 1); } /* Sanity checks */ @@ -652,10 +665,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, (SLICE_NUM_HIGH && !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) { slice_convert(mm, &potential_mask, psize); -#ifdef CONFIG_PPC64 if (psize > MMU_PAGE_BASE) - core_flush_all_slbs(mm); -#endif + on_each_cpu(slice_flush_segments, mm, 1); } return newaddr; @@ -746,20 +757,6 @@ void slice_init_new_context_exec(struct mm_struct *mm) bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } -#ifdef CONFIG_PPC_BOOK3S_64 -void slice_setup_new_exec(void) -{ - struct mm_struct *mm = current->mm; - - slice_dbg("slice_setup_new_exec(mm=%p)\n", mm); - - if (!is_32bit_task()) - return; - - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; -} -#endif - void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize) { |