summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/8xx_mmu.c2
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c28
-rw-r--r--arch/powerpc/mm/hash_native_64.c15
-rw-r--r--arch/powerpc/mm/hash_utils_64.c34
-rw-r--r--arch/powerpc/mm/hugetlbpage.c26
-rw-r--r--arch/powerpc/mm/init_32.c7
-rw-r--r--arch/powerpc/mm/init_64.c8
-rw-r--r--arch/powerpc/mm/mem.c25
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c24
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c15
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/numa.c36
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c8
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c6
-rw-r--r--arch/powerpc/mm/pgtable-radix.c218
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c5
-rw-r--r--arch/powerpc/mm/pkeys.c17
-rw-r--r--arch/powerpc/mm/slb.c108
-rw-r--r--arch/powerpc/mm/slb_low.S19
-rw-r--r--arch/powerpc/mm/slice.c485
-rw-r--r--arch/powerpc/mm/tlb-radix.c14
-rw-r--r--arch/powerpc/mm/tlb_hash64.c2
24 files changed, 662 insertions, 445 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 849f50cd62f2..cf77d755246d 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
mtspr(SPRN_M_TW, __pa(pgd) - offset);
/* Update context */
- mtspr(SPRN_M_CASID, id);
+ mtspr(SPRN_M_CASID, id - 1);
/* sync */
mb();
}
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 697b70ad1195..7d0945bd3a61 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
vsidkey = SLB_VSID_USER;
break;
case VMALLOC_REGION_ID:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 866446cf2d9a..c01d627e687a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -297,7 +297,12 @@ static bool access_error(bool is_write, bool is_exec,
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
return true;
-
+ /*
+ * We should ideally do the vma pkey access check here. But in the
+ * fault path, handle_mm_fault() also does the same check. To avoid
+ * these multiple checks, we skip it here and handle access error due
+ * to pkeys later.
+ */
return false;
}
@@ -518,25 +523,16 @@ good_area:
#ifdef CONFIG_PPC_MEM_KEYS
/*
- * if the HPTE is not hashed, hardware will not detect
- * a key fault. Lets check if we failed because of a
- * software detected key fault.
+ * we skipped checking for access error due to key earlier.
+ * Check that using handle_mm_fault error return.
*/
if (unlikely(fault & VM_FAULT_SIGSEGV) &&
- !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
- is_exec, 0)) {
- /*
- * The PGD-PDT...PMD-PTE tree may not have been fully setup.
- * Hence we cannot walk the tree to locate the PTE, to locate
- * the key. Hence let's use vma_pkey() to get the key; instead
- * of get_mm_addr_key().
- */
+ !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
+
int pkey = vma_pkey(vma);
- if (likely(pkey)) {
- up_read(&mm->mmap_sem);
- return bad_key_fault_exception(regs, address, pkey);
- }
+ up_read(&mm->mmap_sem);
+ return bad_key_fault_exception(regs, address, pkey);
}
#endif /* CONFIG_PPC_MEM_KEYS */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 656933c85925..1d049c78c82a 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_restore(flags);
}
-static int native_register_proc_table(unsigned long base, unsigned long page_size,
- unsigned long table_size)
-{
- unsigned long patb1 = base << 25; /* VSID */
-
- patb1 |= (page_size << 5); /* sllp */
- patb1 |= table_size;
-
- partition_tb->patb1 = cpu_to_be64(patb1);
- return 0;
-}
-
void __init hpte_init_native(void)
{
mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
@@ -889,7 +877,4 @@ void __init hpte_init_native(void)
mmu_hash_ops.hpte_clear_all = native_hpte_clear;
mmu_hash_ops.flush_hash_range = native_flush_hash_range;
mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- register_process_table = native_register_proc_table;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index cf290d415dcd..0bd3790d35df 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -132,9 +132,10 @@ EXPORT_SYMBOL(mmu_hash_ops);
* is provided by the firmware.
*/
-/* Pre-POWER4 CPUs (4k pages only)
+/*
+ * Fallback (4k pages only)
*/
-static struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
@@ -554,8 +555,8 @@ static void __init htab_scan_page_sizes(void)
mmu_psize_set_default_penc();
/* Default to 4K pages only */
- memcpy(mmu_psize_defs, mmu_psize_defaults_old,
- sizeof(mmu_psize_defaults_old));
+ memcpy(mmu_psize_defs, mmu_psize_defaults,
+ sizeof(mmu_psize_defaults));
/*
* Try to find the available page sizes in the device-tree
@@ -781,7 +782,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
}
}
-int hash__create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
int rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(PAGE_KERNEL), mmu_linear_psize,
@@ -875,6 +876,12 @@ static void __init htab_initialize(void)
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ register_process_table(0, 0, 0);
#ifdef CONFIG_FA_DUMP
/*
* If firmware assisted dump is active firmware preserves
@@ -1110,19 +1117,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_MM_SLICES
static unsigned int get_paca_psize(unsigned long addr)
{
- u64 lpsizes;
- unsigned char *hpsizes;
+ unsigned char *psizes;
unsigned long index, mask_index;
if (addr < SLICE_LOW_TOP) {
- lpsizes = get_paca()->mm_ctx_low_slices_psize;
+ psizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xF;
+ } else {
+ psizes = get_paca()->mm_ctx_high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = get_paca()->mm_ctx_high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
}
#else
@@ -1262,7 +1268,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
@@ -1527,7 +1533,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Get VSID */
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
if (!vsid)
return;
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 3a08d211d2ee..f1153f8254e3 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -122,9 +122,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
#define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
/*
@@ -553,9 +550,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
+#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
return radix__hugetlb_get_unmapped_area(file, addr, len,
pgoff, flags);
+#endif
return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
@@ -563,10 +562,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{
#ifdef CONFIG_PPC_MM_SLICES
- unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
/* With radix we don't use slice, so derive it from vma*/
- if (!radix_enabled())
+ if (!radix_enabled()) {
+ unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
return 1UL << mmu_psize_to_shift(psize);
+ }
#endif
return vma_kernel_pagesize(vma);
}
@@ -663,15 +664,26 @@ static int __init hugetlbpage_init(void)
shift = mmu_psize_to_shift(psize);
- if (add_huge_page_size(1ULL << shift) < 0)
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (shift > PGDIR_SHIFT)
continue;
-
+ else if (shift > PUD_SHIFT)
+ pdshift = PGDIR_SHIFT;
+ else if (shift > PMD_SHIFT)
+ pdshift = PUD_SHIFT;
+ else
+ pdshift = PMD_SHIFT;
+#else
if (shift < HUGEPD_PUD_SHIFT)
pdshift = PMD_SHIFT;
else if (shift < HUGEPD_PGD_SHIFT)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
+#endif
+
+ if (add_huge_page_size(1ULL << shift) < 0)
+ continue;
/*
* if we have pdshift and shift value same, we don't
* use pgt cache for hugepd.
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6419b33ca309..3e59e5d64b01 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -88,18 +88,13 @@ void MMU_init(void);
int __map_without_bats;
int __map_without_ltlbs;
-/*
- * This tells the system to allow ioremapping memory marked as reserved.
- */
-int __allow_ioremap_reserved;
-
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
/*
* Check for command-line options that affect what MMU_init will do.
*/
-void __init MMU_setup(void)
+static void __init MMU_setup(void)
{
/* Check for nobats option (used in mapin_ram). */
if (strstr(boot_command_line, "nobats")) {
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fdb424a29f03..51ce091914f9 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,12 +68,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_BOOK3S_64
-#if H_PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
phys_addr_t kernstart_addr;
@@ -372,7 +366,7 @@ static int __init parse_disable_radix(char *p)
{
bool val;
- if (strlen(p) == 0)
+ if (!p)
val = true;
else if (kstrtobool(p, &val))
return -EINVAL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index fe8c61149fb8..737f8a4632cc 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -82,17 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
int page_is_ram(unsigned long pfn)
{
-#ifndef CONFIG_PPC64 /* XXX for now */
- return pfn < max_pfn;
-#else
- unsigned long paddr = (pfn << PAGE_SHIFT);
- struct memblock_region *reg;
-
- for_each_memblock(memory, reg)
- if (paddr >= reg->base && paddr < (reg->base + reg->size))
- return 1;
- return 0;
-#endif
+ return memblock_is_memory(__pfn_to_phys(pfn));
}
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -117,7 +107,7 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
-int __weak create_section_mapping(unsigned long start, unsigned long end)
+int __weak create_section_mapping(unsigned long start, unsigned long end, int nid)
{
return -ENODEV;
}
@@ -127,7 +117,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -137,7 +127,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
resize_hpt_for_hotplug(memblock_phys_mem_size());
start = (unsigned long)__va(start);
- rc = create_section_mapping(start, start + size);
+ rc = create_section_mapping(start, start + size, nid);
if (rc) {
pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
start, start + size, rc);
@@ -148,7 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -212,7 +202,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
EXPORT_SYMBOL_GPL(walk_system_ram_range);
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
min_low_pfn = MEMORY_START >> PAGE_SHIFT;
@@ -224,7 +214,10 @@ void __init initmem_init(void)
* memblock_regions
*/
memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+}
+void __init initmem_init(void)
+{
/* XXX need to clip this if using highmem? */
sparse_memory_present_with_active_regions(0);
sparse_init();
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 3f980baade4c..b75194dff64c 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm)
return index;
/*
- * In the case of exec, use the default limit,
- * otherwise inherit it from the mm we are duplicating.
- */
- if (!mm->context.slb_addr_limit)
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-
- /*
* The old code would re-promote on fork, we don't do that when using
* slices as it could cause problem promoting slices that have been
* forced down to 4K.
@@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm)
* check against 0 is OK.
*/
if (mm->context.id == 0)
- slice_set_user_psize(mm, mmu_virtual_psize);
+ slice_init_new_context_exec(mm);
subpage_prot_init_new_context(mm);
@@ -186,6 +179,19 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);
+static void destroy_contexts(mm_context_t *ctx)
+{
+ int index, context_id;
+
+ spin_lock(&mmu_context_lock);
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_remove(&mmu_context_ida, context_id);
+ }
+ spin_unlock(&mmu_context_lock);
+}
+
#ifdef CONFIG_PPC_64K_PAGES
static void destroy_pagetable_page(struct mm_struct *mm)
{
@@ -224,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
+ destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT;
}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4554d6527682..be8f5c9d4d08 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
pr_hard("initing context for mm @%p\n", mm);
+#ifdef CONFIG_PPC_MM_SLICES
+ /*
+ * We have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we properly
+ * initialize context slice details for newly allocated mm's (which will
+ * have id == 0) and don't alter context slice inherited via fork (which
+ * will have id != 0).
+ */
+ if (mm->context.id == 0)
+ slice_init_new_context_exec(mm);
+#endif
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
return 0;
@@ -428,8 +439,8 @@ void __init mmu_context_init(void)
* -- BenH
*/
if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
- first_context = 0;
- last_context = 15;
+ first_context = 1;
+ last_context = 16;
no_selective_tlbil = true;
} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
first_context = 1;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 57fbc554c785..c4c0a09a7775 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -98,7 +98,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
extern int __map_without_bats;
-extern int __allow_ioremap_reserved;
extern unsigned int rtas_data, rtas_size;
struct hash_pte;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index edd8d0bc9364..57a5029b4521 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -831,18 +831,13 @@ out:
of_node_put(rtas);
}
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
{
- int nid, cpu;
-
- max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- max_pfn = max_low_pfn;
+ int cpu;
if (parse_numa_properties())
setup_nonnuma();
- memblock_dump_all();
-
/*
* Modify the set of possible NUMA nodes to reflect information
* available about the set of online nodes, and the set of nodes
@@ -853,6 +848,23 @@ void __init initmem_init(void)
find_possible_nodes();
+ setup_node_to_cpumask_map();
+
+ reset_numa_cpu_lookup_table();
+
+ for_each_present_cpu(cpu)
+ numa_setup_cpu(cpu);
+}
+
+void __init initmem_init(void)
+{
+ int nid;
+
+ max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ max_pfn = max_low_pfn;
+
+ memblock_dump_all();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
@@ -863,10 +875,6 @@ void __init initmem_init(void)
sparse_init();
- setup_node_to_cpumask_map();
-
- reset_numa_cpu_lookup_table();
-
/*
* We need the numa_cpu_lookup_table to be accurate for all CPUs,
* even before we online them, so that we can use cpu_to_{node,mem}
@@ -876,8 +884,6 @@ void __init initmem_init(void)
*/
cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
- for_each_present_cpu(cpu)
- numa_setup_cpu(cpu);
}
static int __init early_numa(char *p)
@@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void)
for_each_possible_cpu(cpu) {
int i;
u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+ volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
for (i = 0; i < distance_ref_points_depth; i++)
counts[i] = hypervisor_counts[i];
@@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void)
for_each_possible_cpu(cpu) {
int i, changed = 0;
u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+ volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
for (i = 0; i < distance_ref_points_depth; i++) {
if (hypervisor_counts[i] != counts[i]) {
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 422e80253a33..518518fb7c45 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -155,15 +155,15 @@ void mmu_cleanup_all(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid)
{
if (radix_enabled())
- return radix__create_section_mapping(start, end);
+ return radix__create_section_mapping(start, end, nid);
- return hash__create_section_mapping(start, end);
+ return hash__create_section_mapping(start, end, nid);
}
-int remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
{
if (radix_enabled())
return radix__remove_section_mapping(start, end);
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 469808e77e58..199bfda5f0d9 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -24,6 +24,10 @@
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* vmemmap is the starting address of the virtual address space where
@@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 2e10a964e290..f1891e215e39 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
return 0;
}
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+ unsigned long region_start, unsigned long region_end)
{
+ unsigned long pa = 0;
void *pt;
- pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+ if (region_start || region_end) /* has region hint */
+ pa = memblock_alloc_range(size, size, region_start, region_end,
+ MEMBLOCK_NONE);
+ else if (nid != -1) /* has node hint */
+ pa = memblock_alloc_base_nid(size, size,
+ MEMBLOCK_ALLOC_ANYWHERE,
+ nid, MEMBLOCK_NONE);
+
+ if (!pa)
+ pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+ BUG_ON(!pa);
+
+ pt = __va(pa);
memset(pt, 0, size);
return pt;
}
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
- unsigned int map_page_size)
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(ea);
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+ region_start, region_end);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+ pudp = pud_offset(pgdp, ea);
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+ region_start, region_end);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
+ }
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+ smp_wmb();
+ return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
@@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
* Make sure task size is correct as per the max adddr
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
- if (slab_is_available()) {
- pgdp = pgd_offset_k(ea);
- pudp = pud_alloc(&init_mm, pgdp, ea);
- if (!pudp)
- return -ENOMEM;
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- pmdp = pmd_alloc(&init_mm, pudp, ea);
- if (!pmdp)
- return -ENOMEM;
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- ptep = pte_alloc_kernel(pmdp, ea);
- if (!ptep)
- return -ENOMEM;
- } else {
- pgdp = pgd_offset_k(ea);
- if (pgd_none(*pgdp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
- pgd_populate(&init_mm, pgdp, pudp);
- }
- pudp = pud_offset(pgdp, ea);
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
- pud_populate(&init_mm, pudp, pmdp);
- }
- pmdp = pmd_offset(pudp, ea);
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
- ptep = pte_offset_kernel(pmdp, ea);
+
+ if (unlikely(!slab_is_available()))
+ return early_map_kernel_page(ea, pa, flags, map_page_size,
+ nid, region_start, region_end);
+
+ /*
+ * Should make page table allocation functions be able to take a
+ * node, so we can place kernel page tables on the right nodes after
+ * boot.
+ */
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
}
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
set_the_pte:
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
smp_wmb();
return 0;
}
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
#ifdef CONFIG_STRICT_KERNEL_RWX
void radix__change_memory_range(unsigned long start, unsigned long end,
unsigned long clear)
@@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start,
}
static int __meminit create_physical_mapping(unsigned long start,
- unsigned long end)
+ unsigned long end,
+ int nid)
{
unsigned long vaddr, addr, mapping_size = 0;
pgprot_t prot;
@@ -267,7 +323,7 @@ retry:
else
prot = PAGE_KERNEL;
- rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+ rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
if (rc)
return rc;
}
@@ -276,7 +332,7 @@ retry:
return 0;
}
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
@@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void)
/*
* Create the linear mapping, using standard page size for now
*/
- for_each_memblock(memory, reg)
+ for_each_memblock(memory, reg) {
+ /*
+ * The memblock allocator is up at this point, so the
+ * page tables will be allocated within the range. No
+ * need or a node (which we don't have yet).
+ */
WARN_ON(create_physical_mapping(reg->base,
- reg->base + reg->size));
+ reg->base + reg->size,
+ -1));
+ }
/* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void)
* host.
*/
BUG_ON(PRTB_SIZE_SHIFT > 36);
- process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+ process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
/*
* Fill in the process table.
*/
@@ -575,12 +638,8 @@ void __init radix__early_init_mmu(void)
#ifdef CONFIG_PCI
pci_io_base = ISA_IO_BASE;
#endif
-
- /*
- * For now radix also use the same frag size
- */
- __pte_frag_nr = H_PTE_FRAG_NR;
- __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+ __pte_frag_nr = RADIX_PTE_FRAG_NR;
+ __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
@@ -695,7 +754,7 @@ struct change_mapping_params {
unsigned long aligned_end;
};
-static int stop_machine_change_mapping(void *data)
+static int __meminit stop_machine_change_mapping(void *data)
{
struct change_mapping_params *params =
(struct change_mapping_params *)data;
@@ -705,8 +764,8 @@ static int stop_machine_change_mapping(void *data)
spin_unlock(&init_mm.page_table_lock);
pte_clear(&init_mm, params->aligned_start, params->pte);
- create_physical_mapping(params->aligned_start, params->start);
- create_physical_mapping(params->end, params->aligned_end);
+ create_physical_mapping(params->aligned_start, params->start, -1);
+ create_physical_mapping(params->end, params->aligned_end, -1);
spin_lock(&init_mm.page_table_lock);
return 0;
}
@@ -742,7 +801,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
/*
* clear the pte and potentially split the mapping helper
*/
-static void split_kernel_mapping(unsigned long addr, unsigned long end,
+static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
unsigned long size, pte_t *pte)
{
unsigned long mask = ~(size - 1);
@@ -835,7 +894,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
}
}
-static void remove_pagetable(unsigned long start, unsigned long end)
+static void __meminit remove_pagetable(unsigned long start, unsigned long end)
{
unsigned long addr, next;
pud_t *pud_base;
@@ -863,12 +922,12 @@ static void remove_pagetable(unsigned long start, unsigned long end)
radix__flush_tlb_kernel_range(start, end);
}
-int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- return create_physical_mapping(start, end);
+ return create_physical_mapping(start, end, nid);
}
-int radix__remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
{
remove_pagetable(start, end);
return 0;
@@ -876,19 +935,30 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end)
#endif /* CONFIG_MEMORY_HOTPLUG */
#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+ pgprot_t flags, unsigned int map_page_size,
+ int nid)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
int __meminit radix__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
{
/* Create a PTE encoding */
unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+ int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+ int ret;
+
+ ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+ BUG_ON(ret);
- BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
{
remove_pagetable(start, start + page_size);
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d35d9ad3c1cd..120a49bfb9c6 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -148,7 +148,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
* mem_init() sets high_memory so only do the check after that.
*/
if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
- !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
+ page_is_ram(__phys_to_pfn(p))) {
printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
(unsigned long long)p, __builtin_return_address(0));
return NULL;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index adf469f312f2..9bf659d5078c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,11 +57,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_PPC_BOOK3S_64
-#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
#ifdef CONFIG_PPC_BOOK3S_64
/*
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index ba71c5481f42..0eafdf01edc7 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -119,18 +119,15 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
+ initial_allocation_mask = ~0x0;
+ pkey_amr_uamor_mask = ~0x0ul;
+ pkey_iamr_mask = ~0x0ul;
/*
- * Bits are in LE format. NOTE: 1, 0 are reserved.
+ * key 0, 1 are reserved.
* key 0 is the default key, which allows read/write/execute.
* key 1 is recommended not to be used. PowerISA(3.0) page 1015,
* programming note.
*/
- initial_allocation_mask = ~0x0;
-
- /* register mask is in BE format */
- pkey_amr_uamor_mask = ~0x0ul;
- pkey_iamr_mask = ~0x0ul;
-
for (i = 2; i < (pkeys_total - os_reserved); i++) {
initial_allocation_mask &= ~(0x1 << i);
pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
@@ -308,9 +305,9 @@ void thread_pkey_regs_init(struct thread_struct *thread)
if (static_branch_likely(&pkey_disabled))
return;
- write_amr(read_amr() & pkey_amr_uamor_mask);
- write_iamr(read_iamr() & pkey_iamr_mask);
- write_uamor(read_uamor() & pkey_amr_uamor_mask);
+ thread->amr = read_amr() & pkey_amr_uamor_mask;
+ thread->iamr = read_iamr() & pkey_iamr_mask;
+ thread->uamor = read_uamor() & pkey_amr_uamor_mask;
}
static inline bool pkey_allows_readwrite(int pkey)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 13cfe413b40d..66577cc66dc9 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,7 @@
#include <asm/cacheflush.h>
#include <asm/smp.h>
#include <linux/compiler.h>
+#include <linux/context_tracking.h>
#include <linux/mm_types.h>
#include <asm/udbg.h>
@@ -340,3 +341,110 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
}
+
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+ int bpsize, int ssize)
+{
+ unsigned long flags, vsid_data, esid_data;
+ enum slb_index index;
+ int slb_cache_index;
+
+ /*
+ * We are irq disabled, hence should be safe to access PACA.
+ */
+ index = get_paca()->stab_rr;
+
+ /*
+ * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+ */
+ if (index < (mmu_slb_size - 1))
+ index++;
+ else
+ index = SLB_NUM_BOLTED;
+
+ get_paca()->stab_rr = index;
+
+ flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+ vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+ ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+ esid_data = mk_esid_data(ea, ssize, index);
+
+ asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+ : "memory");
+
+ /*
+ * Now update slb cache entries
+ */
+ slb_cache_index = get_paca()->slb_cache_ptr;
+ if (slb_cache_index < SLB_CACHE_ENTRIES) {
+ /*
+ * We have space in slb cache for optimized switch_slb().
+ * Top 36 bits from esid_data as per ISA
+ */
+ get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
+ get_paca()->slb_cache_ptr++;
+ } else {
+ /*
+ * Our cache is full and the current cache content strictly
+ * doesn't indicate the active SLB conents. Bump the ptr
+ * so that switch_slb() will ignore the cache.
+ */
+ get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+ }
+}
+
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long vsid;
+ int bpsize;
+
+ /*
+ * We are always above 1TB, hence use high user segment size.
+ */
+ vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+ bpsize = get_slice_psize(mm, ea);
+ insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+}
+
+void slb_miss_large_addr(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+ unsigned long ea = regs->dar;
+ int context;
+
+ if (REGION_ID(ea) != USER_REGION_ID)
+ goto slb_bad_addr;
+
+ /*
+ * Are we beyound what the page table layout supports ?
+ */
+ if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ goto slb_bad_addr;
+
+ /* Lower address should have been handled by asm code */
+ if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+ goto slb_bad_addr;
+
+ /*
+ * consider this as bad access if we take a SLB miss
+ * on an address above addr limit.
+ */
+ if (ea >= current->mm->context.slb_addr_limit)
+ goto slb_bad_addr;
+
+ context = get_ea_context(&current->mm->context, ea);
+ if (!context)
+ goto slb_bad_addr;
+
+ handle_multi_context_slb_miss(context, ea);
+ exception_exit(prev_state);
+ return;
+
+slb_bad_addr:
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+ else
+ bad_page_fault(regs, ea, SIGSEGV);
+ exception_exit(prev_state);
+}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 2cf5ef3fc50d..a83fbd2a4a24 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
*/
_GLOBAL(slb_allocate)
/*
- * check for bad kernel/user address
- * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+ * Check if the address falls within the range of the first context, or
+ * if we may need to handle multi context. For the first context we
+ * allocate the slb entry via the fast path below. For large address we
+ * branch out to C-code and see if additional contexts have been
+ * allocated.
+ * The test here is:
+ * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
*/
- rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
+ rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
bne- 8f
srdi r9,r3,60 /* get region */
@@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
5:
/*
* Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize, r11 is index
+ * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
*/
- ld r9,PACALOWSLICESPSIZE(r13)
- mr r11,r10
+ srdi r11,r10,1 /* index */
+ addi r9,r11,PACALOWSLICESPSIZE
+ lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
+ rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
6:
sldi r11,r11,2 /* index * 4 */
/* Extract the psize and multiply to get an array offset */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 23ec2c5e3b78..9cd87d11fe4e 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -37,32 +37,25 @@
#include <asm/hugetlb.h>
static DEFINE_SPINLOCK(slice_convert_lock);
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
#ifdef DEBUG
int _slice_debug = 1;
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
{
if (!_slice_debug)
return;
- pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
- pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+ pr_devel("%s low_slice: %*pbl\n", label,
+ (int)SLICE_NUM_LOW, &mask->low_slices);
+ pr_devel("%s high_slice: %*pbl\n", label,
+ (int)SLICE_NUM_HIGH, mask->high_slices);
}
#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
#else
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
#define slice_dbg(fmt...)
#endif
@@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
unsigned long end = start + len - 1;
ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- (1u << GET_LOW_SLICE_INDEX(start));
@@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
unsigned long start = slice << SLICE_HIGH_SHIFT;
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+#ifdef CONFIG_PPC64
/* Hack, so that each addresses is controlled by exactly one
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
start = SLICE_LOW_TOP;
+#endif
return !slice_area_is_free(mm, start, end - start);
}
@@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
unsigned long i;
ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
for (i = 0; i < SLICE_NUM_LOW; i++)
if (!slice_low_has_vma(mm, i))
@@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
__set_bit(i, ret->high_slices);
}
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
- unsigned long high_limit)
+#ifdef CONFIG_PPC_BOOK3S_64
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
{
- unsigned char *hpsizes;
- int index, mask_index;
- unsigned long i;
- u64 lpsizes;
-
- ret->low_slices = 0;
- bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &mm->context.mask_64k;
+#endif
+ if (psize == MMU_PAGE_4K)
+ return &mm->context.mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &mm->context.mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &mm->context.mask_16g;
+#endif
+ BUG();
+}
+#elif defined(CONFIG_PPC_8xx)
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+ if (psize == mmu_virtual_psize)
+ return &mm->context.mask_base_psize;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_512K)
+ return &mm->context.mask_512k;
+ if (psize == MMU_PAGE_8M)
+ return &mm->context.mask_8m;
+#endif
+ BUG();
+}
+#else
+#error "Must define the slice masks for page sizes supported by the platform"
+#endif
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == psize)
- ret->low_slices |= 1u << i;
+static bool slice_check_range_fits(struct mm_struct *mm,
+ const struct slice_mask *available,
+ unsigned long start, unsigned long len)
+{
+ unsigned long end = start + len - 1;
+ u64 low_slices = 0;
- if (high_limit <= SLICE_LOW_TOP)
- return;
+ if (start < SLICE_LOW_TOP) {
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- __set_bit(i, ret->high_slices);
+ low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(start));
}
-}
+ if ((low_slices & available->low_slices) != low_slices)
+ return false;
-static int slice_check_fit(struct mm_struct *mm,
- struct slice_mask mask, struct slice_mask available)
-{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
- /*
- * Make sure we just do bit compare only to the max
- * addr limit and not the full bit map size.
- */
- unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
+ if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+ unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+ unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+ unsigned long i;
- bitmap_and(result, mask.high_slices,
- available.high_slices, slice_count);
+ for (i = start_index; i < start_index + count; i++) {
+ if (!test_bit(i, available->high_slices))
+ return false;
+ }
+ }
- return (mask.low_slices & available.low_slices) == mask.low_slices &&
- bitmap_equal(result, mask.high_slices, slice_count);
+ return true;
}
static void slice_flush_segments(void *parm)
{
+#ifdef CONFIG_PPC64
struct mm_struct *mm = parm;
unsigned long flags;
@@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm)
local_irq_save(flags);
slb_flush_and_rebolt();
local_irq_restore(flags);
+#endif
}
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+ const struct slice_mask *mask, int psize)
{
int index, mask_index;
/* Write the new slice psize bits */
- unsigned char *hpsizes;
- u64 lpsizes;
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *psize_mask, *old_mask;
unsigned long i, flags;
+ int old_psize;
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
slice_print_mask(" mask", mask);
+ psize_mask = slice_mask_for_size(mm, psize);
+
/* We need to use a spinlock here to protect against
* concurrent 64k -> 4k demotion ...
*/
spin_lock_irqsave(&slice_convert_lock, flags);
lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (mask.low_slices & (1u << i))
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ if (!(mask->low_slices & (1u << i)))
+ continue;
+
+ mask_index = i & 0x1;
+ index = i >> 1;
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ /* Update the slice_mask */
+ old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask->low_slices &= ~(1u << i);
+ psize_mask->low_slices |= 1u << i;
+
+ /* Update the sizes array */
+ lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
hpsizes = mm->context.high_slices_psize;
for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ if (!test_bit(i, mask->high_slices))
+ continue;
+
mask_index = i & 0x1;
index = i >> 1;
- if (test_bit(i, mask.high_slices))
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
+
+ /* Update the slice_mask */
+ old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(mm, old_psize);
+ __clear_bit(i, old_mask->high_slices);
+ __set_bit(i, psize_mask->high_slices);
+
+ /* Update the sizes array */
+ hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
(((unsigned long)psize) << (mask_index * 4));
}
@@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
* 'available' slice_mark.
*/
static bool slice_scan_available(unsigned long addr,
- struct slice_mask available,
- int end,
- unsigned long *boundary_addr)
+ const struct slice_mask *available,
+ int end, unsigned long *boundary_addr)
{
unsigned long slice;
if (addr < SLICE_LOW_TOP) {
slice = GET_LOW_SLICE_INDEX(addr);
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
- return !!(available.low_slices & (1u << slice));
+ return !!(available->low_slices & (1u << slice));
} else {
slice = GET_HIGH_SLICE_INDEX(addr);
*boundary_addr = (slice + end) ?
((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
- return !!test_bit(slice, available.high_slices);
+ return !!test_bit(slice, available->high_slices);
}
}
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
unsigned long len,
- struct slice_mask available,
+ const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
unsigned long len,
- struct slice_mask available,
+ const struct slice_mask *available,
int psize, unsigned long high_limit)
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
- struct slice_mask mask, int psize,
+ const struct slice_mask *mask, int psize,
int topdown, unsigned long high_limit)
{
if (topdown)
@@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
}
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_copy_mask(struct slice_mask *dst,
+ const struct slice_mask *src)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
- dst->low_slices |= src->low_slices;
- bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+ dst->low_slices = src->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
}
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
{
- DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
- dst->low_slices &= ~src->low_slices;
+ dst->low_slices = src1->low_slices | src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
- bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
- bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+static inline void slice_andnot_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
+{
+ dst->low_slices = src1->low_slices & ~src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
}
#ifdef CONFIG_PPC_64K_PAGES
@@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
int topdown)
{
- struct slice_mask mask;
struct slice_mask good_mask;
struct slice_mask potential_mask;
- struct slice_mask compat_mask;
+ const struct slice_mask *maskp;
+ const struct slice_mask *compat_maskp = NULL;
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long page_size = 1UL << pshift;
@@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
}
if (high_limit > mm->context.slb_addr_limit) {
+ /*
+ * Increasing the slb_addr_limit does not require
+ * slice mask cache to be recalculated because it should
+ * be already initialised beyond the old address limit.
+ */
mm->context.slb_addr_limit = high_limit;
+
on_each_cpu(slice_flush_segments, mm, 1);
}
- /*
- * init different masks
- */
- mask.low_slices = 0;
- bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
-
- /* silence stupid warning */;
- potential_mask.low_slices = 0;
- bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
-
- compat_mask.low_slices = 0;
- bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
-
/* Sanity checks */
BUG_ON(mm->task_size == 0);
BUG_ON(mm->context.slb_addr_limit == 0);
@@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- slice_mask_for_size(mm, psize, &good_mask, high_limit);
- slice_print_mask(" good_mask", good_mask);
+ maskp = slice_mask_for_size(mm, psize);
/*
* Here "good" means slices that are already the right page size,
@@ -503,40 +548,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* search in good | compat | free, found => convert free.
*/
-#ifdef CONFIG_PPC_64K_PAGES
- /* If we support combo pages, we can allow 64k pages in 4k slices */
- if (psize == MMU_PAGE_64K) {
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
+ /*
+ * If we support combo pages, we can allow 64k pages in 4k slices
+ * The mask copies could be avoided in most cases here if we had
+ * a pointer to good mask for the next code to use.
+ */
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+ compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
if (fixed)
- slice_or_mask(&good_mask, &compat_mask);
+ slice_or_mask(&good_mask, maskp, compat_maskp);
+ else
+ slice_copy_mask(&good_mask, maskp);
+ } else {
+ slice_copy_mask(&good_mask, maskp);
}
-#endif
+
+ slice_print_mask(" good_mask", &good_mask);
+ if (compat_maskp)
+ slice_print_mask(" compat_mask", compat_maskp);
/* First check hint if it's valid or if we have MAP_FIXED */
if (addr != 0 || fixed) {
- /* Build a mask for the requested range */
- slice_range_to_mask(addr, len, &mask);
- slice_print_mask(" mask", mask);
-
/* Check if we fit in the good mask. If we do, we just return,
* nothing else to do
*/
- if (slice_check_fit(mm, mask, good_mask)) {
+ if (slice_check_range_fits(mm, &good_mask, addr, len)) {
slice_dbg(" fits good !\n");
- return addr;
+ newaddr = addr;
+ goto return_addr;
}
} else {
/* Now let's see if we can find something in the existing
* slices for that size
*/
- newaddr = slice_find_area(mm, len, good_mask,
+ newaddr = slice_find_area(mm, len, &good_mask,
psize, topdown, high_limit);
if (newaddr != -ENOMEM) {
/* Found within the good mask, we don't have to setup,
* we thus return directly
*/
slice_dbg(" found area at 0x%lx\n", newaddr);
- return newaddr;
+ goto return_addr;
}
}
/*
@@ -544,12 +596,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* empty and thus can be converted
*/
slice_mask_for_free(mm, &potential_mask, high_limit);
- slice_or_mask(&potential_mask, &good_mask);
- slice_print_mask(" potential", potential_mask);
+ slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+ slice_print_mask(" potential", &potential_mask);
- if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
- slice_dbg(" fits potential !\n");
- goto convert;
+ if (addr != 0 || fixed) {
+ if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+ slice_dbg(" fits potential !\n");
+ newaddr = addr;
+ goto convert;
+ }
}
/* If we have MAP_FIXED and failed the above steps, then error out */
@@ -562,46 +617,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* anywhere in the good area.
*/
if (addr) {
- addr = slice_find_area(mm, len, good_mask,
- psize, topdown, high_limit);
- if (addr != -ENOMEM) {
- slice_dbg(" found area at 0x%lx\n", addr);
- return addr;
+ newaddr = slice_find_area(mm, len, &good_mask,
+ psize, topdown, high_limit);
+ if (newaddr != -ENOMEM) {
+ slice_dbg(" found area at 0x%lx\n", newaddr);
+ goto return_addr;
}
}
/* Now let's see if we can find something in the existing slices
* for that size plus free slices
*/
- addr = slice_find_area(mm, len, potential_mask,
- psize, topdown, high_limit);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
#ifdef CONFIG_PPC_64K_PAGES
- if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
+ if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
- slice_or_mask(&potential_mask, &compat_mask);
- addr = slice_find_area(mm, len, potential_mask,
- psize, topdown, high_limit);
+ slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
}
#endif
- if (addr == -ENOMEM)
+ if (newaddr == -ENOMEM)
return -ENOMEM;
- slice_range_to_mask(addr, len, &mask);
- slice_dbg(" found potential area at 0x%lx\n", addr);
- slice_print_mask(" mask", mask);
+ slice_range_to_mask(newaddr, len, &potential_mask);
+ slice_dbg(" found potential area at 0x%lx\n", newaddr);
+ slice_print_mask(" mask", &potential_mask);
convert:
- slice_andnot_mask(&mask, &good_mask);
- slice_andnot_mask(&mask, &compat_mask);
- if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
- slice_convert(mm, mask, psize);
+ /*
+ * Try to allocate the context before we do slice convert
+ * so that we handle the context allocation failure gracefully.
+ */
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+
+ slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+ if (compat_maskp && !fixed)
+ slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+ if (potential_mask.low_slices ||
+ (SLICE_NUM_HIGH &&
+ !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+ slice_convert(mm, &potential_mask, psize);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
}
- return addr;
+ return newaddr;
+return_addr:
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+ return newaddr;
}
EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
@@ -627,94 +700,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
- unsigned char *hpsizes;
+ unsigned char *psizes;
int index, mask_index;
- /*
- * Radix doesn't use slice, but can get enabled along with MMU_SLICE
- */
- if (radix_enabled()) {
-#ifdef CONFIG_PPC_64K_PAGES
- return MMU_PAGE_64K;
-#else
- return MMU_PAGE_4K;
-#endif
- }
+ VM_BUG_ON(radix_enabled());
+
if (addr < SLICE_LOW_TOP) {
- u64 lpsizes;
- lpsizes = mm->context.low_slices_psize;
+ psizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xf;
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
}
- hpsizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
}
EXPORT_SYMBOL_GPL(get_slice_psize);
-/*
- * This is called by hash_page when it needs to do a lazy conversion of
- * an address space from real 64K pages to combo 4K pages (typically
- * when hitting a non cacheable mapping on a processor or hypervisor
- * that won't allow them for 64K pages).
- *
- * This is also called in init_new_context() to change back the user
- * psize from whatever the parent context had it set to
- * N.B. This may be called before mm->context.id has been set.
- *
- * This function will only change the content of the {low,high)_slice_psize
- * masks, it will not flush SLBs as this shall be handled lazily by the
- * caller.
- */
-void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+void slice_init_new_context_exec(struct mm_struct *mm)
{
- int index, mask_index;
- unsigned char *hpsizes;
- unsigned long flags, lpsizes;
- unsigned int old_psize;
- int i;
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *mask;
+ unsigned int psize = mmu_virtual_psize;
- slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+ slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
- VM_BUG_ON(radix_enabled());
- spin_lock_irqsave(&slice_convert_lock, flags);
-
- old_psize = mm->context.user_psize;
- slice_dbg(" old_psize=%d\n", old_psize);
- if (old_psize == psize)
- goto bail;
+ /*
+ * In the case of exec, use the default limit. In the
+ * case of fork it is just inherited from the mm being
+ * duplicated.
+ */
+#ifdef CONFIG_PPC64
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#else
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
mm->context.user_psize = psize;
- wmb();
+ /*
+ * Set all slice psizes to the default.
+ */
lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
+ memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
- (((unsigned long)psize) << (mask_index * 4));
- }
-
-
-
-
- slice_dbg(" lsps=%lx, hsps=%lx\n",
- (unsigned long)mm->context.low_slices_psize,
- (unsigned long)mm->context.high_slices_psize);
+ memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
- bail:
- spin_unlock_irqrestore(&slice_convert_lock, flags);
+ /*
+ * Slice mask cache starts zeroed, fill the default size cache.
+ */
+ mask = slice_mask_for_size(mm, psize);
+ mask->low_slices = ~0UL;
+ if (SLICE_NUM_HIGH)
+ bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
}
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
@@ -725,7 +764,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
VM_BUG_ON(radix_enabled());
slice_range_to_mask(start, len, &mask);
- slice_convert(mm, mask, psize);
+ slice_convert(mm, &mask, psize);
}
#ifdef CONFIG_HUGETLB_PAGE
@@ -748,33 +787,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
* for now as we only use slices with hugetlbfs enabled. This should
* be fixed as the generic code gets fixed.
*/
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
- struct slice_mask mask, available;
+ const struct slice_mask *maskp;
unsigned int psize = mm->context.user_psize;
- unsigned long high_limit = mm->context.slb_addr_limit;
- if (radix_enabled())
- return 0;
+ VM_BUG_ON(radix_enabled());
- slice_range_to_mask(addr, len, &mask);
- slice_mask_for_size(mm, psize, &available, high_limit);
+ maskp = slice_mask_for_size(mm, psize);
#ifdef CONFIG_PPC_64K_PAGES
/* We need to account for 4k slices too */
if (psize == MMU_PAGE_64K) {
- struct slice_mask compat_mask;
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
- slice_or_mask(&available, &compat_mask);
+ const struct slice_mask *compat_maskp;
+ struct slice_mask available;
+
+ compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ slice_or_mask(&available, maskp, compat_maskp);
+ return !slice_check_range_fits(mm, &available, addr, len);
}
#endif
-#if 0 /* too verbose */
- slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
- mm, addr, len);
- slice_print_mask(" mask", mask);
- slice_print_mask(" available", available);
-#endif
- return !slice_check_fit(mm, mask, available);
+ return !slice_check_range_fits(mm, maskp, addr, len);
}
#endif
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index a07f5372a4bf..2fba6170ab3f 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
rb |= set << PPC_BITLSHIFT(51);
rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
rb = PPC_BIT(53); /* IS = 1 */
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -668,7 +668,7 @@ void radix__flush_tlb_all(void)
rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
prs = 0; /* partition scoped */
- r = 1; /* raidx format */
+ r = 1; /* radix format */
rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
asm volatile("ptesync": : :"memory");
@@ -706,7 +706,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
- unsigned int pid = mm->context.id;
+ unsigned long pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
@@ -734,7 +734,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
- if (paca[sib].kvm_hstate.kvm_vcpu)
+ if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 9b23f12e863c..87d71dd25441 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/* Build full vaddr */
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;