diff options
author | Nitin Gupta <nitin.m.gupta@oracle.com> | 2017-08-11 16:46:50 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-15 21:48:07 -0700 |
commit | df7b2155bbe75525531e47c8a67ae4f1dbbae976 (patch) | |
tree | 8a41a2304dc99e7fa024a5debef0ac9197231842 /arch/sparc/mm | |
parent | 44382b0195e608732b0bed14d51b5e5f79f46471 (diff) | |
download | linux-df7b2155bbe75525531e47c8a67ae4f1dbbae976.tar.bz2 |
sparc64: Add 16GB hugepage support
Adds support for 16GB hugepage size. To use this page size
use kernel parameters as:
default_hugepagesz=16G hugepagesz=16G hugepages=10
Testing:
Tested with the stream benchmark which allocates 48G of
arrays backed by 16G hugepages and does RW operation on
them in parallel.
Orabug: 25362942
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/mm')
-rw-r--r-- | arch/sparc/mm/hugetlbpage.c | 74 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 54 |
2 files changed, 100 insertions, 28 deletions
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 28ee8d8ffa07..7acb84db952c 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; switch (shift) { + case HPAGE_16GB_SHIFT: + hugepage_size = _PAGE_SZ16GB_4V; + pte_val(entry) |= _PAGE_PUD_HUGE; + break; case HPAGE_2GB_SHIFT: hugepage_size = _PAGE_SZ2GB_4V; pte_val(entry) |= _PAGE_PMD_HUGE; @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry) unsigned int shift; switch (tte_szbits) { + case _PAGE_SZ16GB_4V: + shift = HPAGE_16GB_SHIFT; + break; case _PAGE_SZ2GB_4V: shift = HPAGE_2GB_SHIFT; break; @@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) { + if (!pud) + return NULL; + + if (sz >= PUD_SIZE) + pte = (pte_t *)pud; + else { pmd = pmd_alloc(mm, pud, addr); if (!pmd) return NULL; @@ -289,12 +301,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (!pgd_none(*pgd)) { pud = pud_offset(pgd, addr); if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) { - if (is_hugetlb_pmd(*pmd)) - pte = (pte_t *)pmd; - else - pte = pte_offset_map(pmd, addr); + if (is_hugetlb_pud(*pud)) + pte = (pte_t *)pud; + else { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + if (is_hugetlb_pmd(*pmd)) + pte = (pte_t *)pmd; + else + pte = pte_offset_map(pmd, addr); + } } } } @@ -305,12 +321,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - unsigned int i, nptes, orig_shift, shift; - unsigned long size; + unsigned int nptes, orig_shift, shift; + unsigned long i, size; pte_t orig; size = huge_tte_to_size(entry); - shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT; + + shift = PAGE_SHIFT; + if (size >= PUD_SIZE) + shift = PUD_SHIFT; + else if (size >= PMD_SIZE) + shift = PMD_SHIFT; + else + shift = PAGE_SHIFT; + nptes = size >> shift; if (!pte_present(*ptep) && pte_present(entry)) @@ -333,19 +357,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned int i, nptes, hugepage_shift; + unsigned int i, nptes, orig_shift, shift; unsigned long size; pte_t entry; entry = *ptep; size = huge_tte_to_size(entry); - if (size >= HPAGE_SIZE) - nptes = size >> PMD_SHIFT; + + shift = PAGE_SHIFT; + if (size >= PUD_SIZE) + shift = PUD_SHIFT; + else if (size >= PMD_SIZE) + shift = PMD_SHIFT; else - nptes = size >> PAGE_SHIFT; + shift = PAGE_SHIFT; - hugepage_shift = pte_none(entry) ? PAGE_SHIFT : - huge_tte_to_shift(entry); + nptes = size >> shift; + orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry); if (pte_present(entry)) mm->context.hugetlb_pte_count -= nptes; @@ -354,11 +382,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, for (i = 0; i < nptes; i++) ptep[i] = __pte(0UL); - maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift); /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ if (size == HPAGE_SIZE) maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, - hugepage_shift); + orig_shift); return entry; } @@ -371,7 +399,8 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { - return 0; + return !pud_none(pud) && + (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID; } static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, @@ -435,8 +464,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, - ceiling); + if (is_hugetlb_pud(*pud)) + pud_clear(pud); + else + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling); } while (pud++, addr = next, addr != end); start &= PGDIR_MASK; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index afa0099f3748..b2ba410b26f4 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -348,6 +348,18 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); +static void __init pud_huge_patch(void) +{ + struct pud_huge_patch_entry *p; + unsigned long addr; + + p = &__pud_huge_patch; + addr = p->addr; + *(unsigned int *)addr = p->insn; + + __asm__ __volatile__("flush %0" : : "r" (addr)); +} + static int __init setup_hugepagesz(char *string) { unsigned long long hugepage_size; @@ -360,6 +372,11 @@ static int __init setup_hugepagesz(char *string) hugepage_shift = ilog2(hugepage_size); switch (hugepage_shift) { + case HPAGE_16GB_SHIFT: + hv_pgsz_mask = HV_PGSZ_MASK_16GB; + hv_pgsz_idx = HV_PGSZ_IDX_16GB; + pud_huge_patch(); + break; case HPAGE_2GB_SHIFT: hv_pgsz_mask = HV_PGSZ_MASK_2GB; hv_pgsz_idx = HV_PGSZ_IDX_2GB; @@ -400,6 +417,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * { struct mm_struct *mm; unsigned long flags; + bool is_huge_tsb; pte_t pte = *ptep; if (tlb_type != hypervisor) { @@ -417,15 +435,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * spin_lock_irqsave(&mm->context.lock, flags); + is_huge_tsb = false; #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && - is_hugetlb_pmd(__pmd(pte_val(pte)))) { - /* We are fabricating 8MB pages using 4MB real hw pages. */ - pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, - address, pte_val(pte)); - } else + if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) { + unsigned long hugepage_size = PAGE_SIZE; + + if (is_vm_hugetlb_page(vma)) + hugepage_size = huge_page_size(hstate_vma(vma)); + + if (hugepage_size >= PUD_SIZE) { + unsigned long mask = 0x1ffc00000UL; + + /* Transfer bits [32:22] from address to resolve + * at 4M granularity. + */ + pte_val(pte) &= ~mask; + pte_val(pte) |= (address & mask); + } else if (hugepage_size >= PMD_SIZE) { + /* We are fabricating 8MB pages using 4MB + * real hw pages. + */ + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); + } + + if (hugepage_size >= PMD_SIZE) { + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, + REAL_HPAGE_SHIFT, address, pte_val(pte)); + is_huge_tsb = true; + } + } #endif + if (!is_huge_tsb) __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, address, pte_val(pte)); |