diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-hash64.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-radix.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_iommu.c | 145 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-book3s64.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-radix.c | 18 |
6 files changed, 136 insertions, 110 deletions
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 2e6a8f9345d3..367ce3a4a503 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -121,3 +121,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } + +pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + unsigned long pte_val; + /* + * Clear the _PAGE_PRESENT so that no hardware parallel update is + * possible. Also keep the pte_present true so that we don't take + * wrong fault. + */ + pte_val = pte_update(vma->vm_mm, addr, ptep, + _PAGE_PRESENT, _PAGE_INVALID, 1); + + return __pte(pte_val); +} + +void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + + if (radix_enabled()) + return radix__huge_ptep_modify_prot_commit(vma, addr, ptep, + old_pte, pte); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 2486bee0f93e..11d9ea28a816 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -90,3 +90,20 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return vm_unmapped_area(&info); } + +void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * To avoid NMMU hang while relaxing access we need to flush the tlb before + * we set the new value. + */ + if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + (atomic_read(&mm->context.copros) > 0)) + radix__flush_hugetlb_page(vma, addr); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index a712a650a8b6..e7a9c4f6bfca 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -21,6 +21,7 @@ #include <linux/sizes.h> #include <asm/mmu_context.h> #include <asm/pte-walk.h> +#include <linux/mm_inline.h> static DEFINE_MUTEX(mem_list_mutex); @@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t { atomic64_t mapped; unsigned int pageshift; u64 ua; /* userspace address */ - u64 entries; /* number of entries in hpas[] */ - u64 *hpas; /* vmalloc'ed */ + u64 entries; /* number of entries in hpas/hpages[] */ + /* + * in mm_iommu_get we temporarily use this to store + * struct page address. + * + * We need to convert ua to hpa in real mode. Make it + * simpler by storing physical address. + */ + union { + struct page **hpages; /* vmalloc'ed */ + phys_addr_t *hpas; + }; #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) u64 dev_hpa; /* Device memory base address */ }; @@ -80,64 +91,13 @@ bool mm_iommu_preregistered(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); -/* - * Taken from alloc_migrate_target with changes to remove CMA allocations - */ -struct page *new_iommu_non_cma_page(struct page *page, unsigned long private) -{ - gfp_t gfp_mask = GFP_USER; - struct page *new_page; - - if (PageCompound(page)) - return NULL; - - if (PageHighMem(page)) - gfp_mask |= __GFP_HIGHMEM; - - /* - * We don't want the allocation to force an OOM if possibe - */ - new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN); - return new_page; -} - -static int mm_iommu_move_page_from_cma(struct page *page) -{ - int ret = 0; - LIST_HEAD(cma_migrate_pages); - - /* Ignore huge pages for now */ - if (PageCompound(page)) - return -EBUSY; - - lru_add_drain(); - ret = isolate_lru_page(page); - if (ret) - return ret; - - list_add(&page->lru, &cma_migrate_pages); - put_page(page); /* Drop the gup reference */ - - ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page, - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE); - if (ret) { - if (!list_empty(&cma_migrate_pages)) - putback_movable_pages(&cma_migrate_pages); - } - - return 0; -} - static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, - unsigned long entries, unsigned long dev_hpa, - struct mm_iommu_table_group_mem_t **pmem) + unsigned long entries, unsigned long dev_hpa, + struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; - long i, j, ret = 0, locked_entries = 0; + long i, ret, locked_entries = 0; unsigned int pageshift; - unsigned long flags; - unsigned long cur_ua; - struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -187,62 +147,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto unlock_exit; } + down_read(&mm->mmap_sem); + ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); + up_read(&mm->mmap_sem); + if (ret != entries) { + /* free the reference taken */ + for (i = 0; i < ret; i++) + put_page(mem->hpages[i]); + + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } + + pageshift = PAGE_SHIFT; for (i = 0; i < entries; ++i) { - cur_ua = ua + (i << PAGE_SHIFT); - if (1 != get_user_pages_fast(cur_ua, - 1/* pages */, 1/* iswrite */, &page)) { - ret = -EFAULT; - for (j = 0; j < i; ++j) - put_page(pfn_to_page(mem->hpas[j] >> - PAGE_SHIFT)); - vfree(mem->hpas); - kfree(mem); - goto unlock_exit; - } + struct page *page = mem->hpages[i]; + /* - * If we get a page from the CMA zone, since we are going to - * be pinning these entries, we might as well move them out - * of the CMA zone if possible. NOTE: faulting in + migration - * can be expensive. Batching can be considered later + * Allow to use larger than 64k IOMMU pages. Only do that + * if we are backed by hugetlb. */ - if (is_migrate_cma_page(page)) { - if (mm_iommu_move_page_from_cma(page)) - goto populate; - if (1 != get_user_pages_fast(cur_ua, - 1/* pages */, 1/* iswrite */, - &page)) { - ret = -EFAULT; - for (j = 0; j < i; ++j) - put_page(pfn_to_page(mem->hpas[j] >> - PAGE_SHIFT)); - vfree(mem->hpas); - kfree(mem); - goto unlock_exit; - } - } -populate: - pageshift = PAGE_SHIFT; - if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) { - pte_t *pte; + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { struct page *head = compound_head(page); - unsigned int compshift = compound_order(head); - unsigned int pteshift; - - local_irq_save(flags); /* disables as well */ - pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift); - - /* Double check it is still the same pinned page */ - if (pte && pte_page(*pte) == head && - pteshift == compshift + PAGE_SHIFT) - pageshift = max_t(unsigned int, pteshift, - PAGE_SHIFT); - local_irq_restore(flags); + + pageshift = compound_order(head) + PAGE_SHIFT; } mem->pageshift = min(mem->pageshift, pageshift); + /* + * We don't need struct page reference any more, switch + * to physical address. + */ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } good_exit: + ret = 0; atomic64_set(&mem->mapped, 1); mem->used = 1; mem->ua = ua; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 87f0dd004295..df1e11ebbabb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -84,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ - dbg("Node to cpumask map for %d nodes\n", nr_node_ids); + dbg("Node to cpumask map for %u nodes\n", nr_node_ids); } static int __init fake_numa_create_new_node(unsigned long end_pfn, @@ -215,7 +215,7 @@ static void initialize_distance_lookup_table(int nid, */ static int associativity_to_nid(const __be32 *associativity) { - int nid = -1; + int nid = NUMA_NO_NODE; if (min_common_depth == -1) goto out; @@ -225,7 +225,7 @@ static int associativity_to_nid(const __be32 *associativity) /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = -1; + nid = NUMA_NO_NODE; if (nid > 0 && of_read_number(associativity, 1) >= distance_ref_points_depth) { @@ -244,7 +244,7 @@ out: */ static int of_node_to_nid_single(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; const __be32 *tmp; tmp = of_get_associativity(device); @@ -256,7 +256,7 @@ static int of_node_to_nid_single(struct device_node *device) /* Walk the device tree upwards, looking for an associativity id */ int of_node_to_nid(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; of_node_get(device); while (device) { @@ -454,7 +454,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = -1; + int nid = NUMA_NO_NODE; struct device_node *cpu; /* @@ -930,7 +930,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) { struct drmem_lmb *lmb; unsigned long lmb_size; - int nid = -1; + int nid = NUMA_NO_NODE; lmb_size = drmem_lmb_size(); @@ -960,7 +960,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) static int hot_add_node_scn_to_nid(unsigned long scn_addr) { struct device_node *memory; - int nid = -1; + int nid = NUMA_NO_NODE; for_each_node_by_type(memory, "memory") { unsigned long start, size; diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index ecd31569a120..e7da590c7a78 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -401,6 +401,31 @@ void arch_report_meminfo(struct seq_file *m) } #endif /* CONFIG_PROC_FS */ +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + unsigned long pte_val; + + /* + * Clear the _PAGE_PRESENT so that no hardware parallel update is + * possible. Also keep the pte_present true so that we don't take + * wrong fault. + */ + pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0); + + return __pte(pte_val); + +} + +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + if (radix_enabled()) + return radix__ptep_modify_prot_commit(vma, addr, + ptep, old_pte, pte); + set_pte_at(vma->vm_mm, addr, ptep, pte); +} + /* * For hash translation mode, we use the deposited table to store hash slot * information and they are stored at PTRS_PER_PMD offset from related pmd diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 931156069a81..dced3cd241c2 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -1063,3 +1063,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, } /* See ptesync comment in radix__set_pte_at */ } + +void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * To avoid NMMU hang while relaxing access we need to flush the tlb before + * we set the new value. We need to do this only for radix, because hash + * translation does flush when updating the linux pte. + */ + if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + (atomic_read(&mm->context.copros) > 0)) + radix__flush_tlb_page(vma, addr); + + set_pte_at(mm, addr, ptep, pte); +} |