From 79c1c594f49a88fba9744cb5c85978c6b1b365ec Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 30 Jun 2021 18:48:00 -0700 Subject: mm/hugetlb: change parameters of arch_make_huge_pte() Patch series "Subject: [PATCH v2 0/5] Implement huge VMAP and VMALLOC on powerpc 8xx", v2. This series implements huge VMAP and VMALLOC on powerpc 8xx. Powerpc 8xx has 4 page sizes: - 4k - 16k - 512k - 8M At the time being, vmalloc and vmap only support huge pages which are leaf at PMD level. Here the PMD level is 4M, it doesn't correspond to any supported page size. For now, implement use of 16k and 512k pages which is done at PTE level. Support of 8M pages will be implemented later, it requires use of hugepd tables. To allow this, the architecture provides two functions: - arch_vmap_pte_range_map_size() which tells vmap_pte_range() what page size to use. A stub returning PAGE_SIZE is provided when the architecture doesn't provide this function. - arch_vmap_pte_supported_shift() which tells __vmalloc_node_range() what page shift to use for a given area size. A stub returning PAGE_SHIFT is provided when the architecture doesn't provide this function. This patch (of 5): At the time being, arch_make_huge_pte() has the following prototype: pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, struct page *page, int writable); vma is used to get the pages shift or size. vma is also used on Sparc to get vm_flags. page is not used. writable is not used. In order to use this function without a vma, replace vma by shift and flags. Also remove the used parameters. Link: https://lkml.kernel.org/r/cover.1620795204.git.christophe.leroy@csgroup.eu Link: https://lkml.kernel.org/r/f4633ac6a7da2f22f31a04a89e0a7026bb78b15b.1620795204.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Acked-by: Mike Kravetz Cc: Nicholas Piggin Cc: Mike Kravetz Cc: Mike Rapoport Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Uladzislau Rezki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/hugetlbpage.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 58987a98e179..23505fc35324 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -339,10 +339,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; } -pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, - struct page *page, int writable) +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) { - size_t pagesize = huge_page_size(hstate_vma(vma)); + size_t pagesize = 1UL << shift; if (pagesize == CONT_PTE_SIZE) { entry = pte_mkcont(entry); -- cgit v1.2.3 From c742199a014de23ee92055c2473d91fe5561ffdf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 30 Jun 2021 18:48:03 -0700 Subject: mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge For architectures with no PMD and/or no PUD, add stubs similar to what we have for architectures without P4D. [christophe.leroy@csgroup.eu: arm64: define only {pud/pmd}_{set/clear}_huge when useful] Link: https://lkml.kernel.org/r/73ec95f40cafbbb69bdfb43a7f53876fd845b0ce.1620990479.git.christophe.leroy@csgroup.eu [christophe.leroy@csgroup.eu: x86: define only {pud/pmd}_{set/clear}_huge when useful] Link: https://lkml.kernel.org/r/7fbf1b6bc3e15c07c24fa45278d57064f14c896b.1620930415.git.christophe.leroy@csgroup.eu Link: https://lkml.kernel.org/r/5ac5976419350e8e048d463a64cae449eb3ba4b0.1620795204.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Uladzislau Rezki Cc: Naresh Kamboju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 20 ++++++++++++-------- arch/x86/mm/pgtable.c | 34 +++++++++++++++++++--------------- include/linux/pgtable.h | 26 +++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 24 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 89b66ef43a0f..add67deb4910 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1338,6 +1338,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } +#if CONFIG_PGTABLE_LEVELS > 3 int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); @@ -1352,6 +1353,16 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) return 1; } +int pud_clear_huge(pud_t *pudp) +{ + if (!pud_sect(READ_ONCE(*pudp))) + return 0; + pud_clear(pudp); + return 1; +} +#endif + +#if CONFIG_PGTABLE_LEVELS > 2 int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); @@ -1366,14 +1377,6 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) return 1; } -int pud_clear_huge(pud_t *pudp) -{ - if (!pud_sect(READ_ONCE(*pudp))) - return 0; - pud_clear(pudp); - return 1; -} - int pmd_clear_huge(pmd_t *pmdp) { if (!pmd_sect(READ_ONCE(*pmdp))) @@ -1381,6 +1384,7 @@ int pmd_clear_huge(pmd_t *pmdp) pmd_clear(pmdp); return 1; } +#endif int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index d27cf69e811d..1303ff6ef7be 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -682,6 +682,7 @@ int p4d_clear_huge(p4d_t *p4d) } #endif +#if CONFIG_PGTABLE_LEVELS > 3 /** * pud_set_huge - setup kernel PUD mapping * @@ -720,6 +721,23 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ +int pud_clear_huge(pud_t *pud) +{ + if (pud_large(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} +#endif + +#if CONFIG_PGTABLE_LEVELS > 2 /** * pmd_set_huge - setup kernel PMD mapping * @@ -750,21 +768,6 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } -/** - * pud_clear_huge - clear kernel PUD mapping when it is set - * - * Returns 1 on success and 0 on failure (no PUD map is found). - */ -int pud_clear_huge(pud_t *pud) -{ - if (pud_large(*pud)) { - pud_clear(pud); - return 1; - } - - return 0; -} - /** * pmd_clear_huge - clear kernel PMD mapping when it is set * @@ -779,6 +782,7 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } +#endif #ifdef CONFIG_X86_64 /** diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c32600c9e1ad..2b0d02291178 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1379,10 +1379,34 @@ static inline int p4d_clear_huge(p4d_t *p4d) } #endif /* !__PAGETABLE_P4D_FOLDED */ +#ifndef __PAGETABLE_PUD_FOLDED int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); -int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); +#else +static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int pud_clear_huge(pud_t *pud) +{ + return 0; +} +#endif /* !__PAGETABLE_PUD_FOLDED */ + +#ifndef __PAGETABLE_PMD_FOLDED +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pmd_clear_huge(pmd_t *pmd); +#else +static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} +static inline int pmd_clear_huge(pmd_t *pmd) +{ + return 0; +} +#endif /* !__PAGETABLE_PMD_FOLDED */ + int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); -- cgit v1.2.3 From 873ba463914cf484371cba06959d320f9d3121ca Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 30 Jun 2021 18:51:19 -0700 Subject: arm64: decouple check whether pfn is in linear map from pfn_valid() The intended semantics of pfn_valid() is to verify whether there is a struct page for the pfn in question and nothing else. Yet, on arm64 it is used to distinguish memory areas that are mapped in the linear map vs those that require ioremap() to access them. Introduce a dedicated pfn_is_map_memory() wrapper for memblock_is_map_memory() to perform such check and use it where appropriate. Using a wrapper allows to avoid cyclic include dependencies. While here also update style of pfn_valid() so that both pfn_valid() and pfn_is_map_memory() declarations will be consistent. Link: https://lkml.kernel.org/r/20210511100550.28178-4-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Acked-by: Ard Biesheuvel Reviewed-by: Kefeng Wang Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/include/asm/page.h | 3 ++- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/mm/init.c | 12 ++++++++++++ arch/arm64/mm/ioremap.c | 4 ++-- arch/arm64/mm/mmu.c | 2 +- 6 files changed, 19 insertions(+), 6 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 87b90dc27a43..9027b7e16c4c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -369,7 +369,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_addr_valid(addr) ({ \ __typeof__(addr) __addr = __tag_reset(addr); \ - __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ + __is_lm_address(__addr) && pfn_is_map_memory(virt_to_pfn(__addr)); \ }) void dump_mem_limit(void); diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 012cffc574e8..75ddfe671393 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,7 +37,8 @@ void copy_highpage(struct page *to, struct page *from); typedef struct page *pgtable_t; -extern int pfn_valid(unsigned long); +int pfn_valid(unsigned long pfn); +int pfn_is_map_memory(unsigned long pfn); #include diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 74b3c1a3ff5a..5e0d40f9fb86 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -85,7 +85,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) static bool kvm_is_device_pfn(unsigned long pfn) { - return !pfn_valid(pfn); + return !pfn_is_map_memory(pfn); } static void *stage2_memcache_zalloc_page(void *arg) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e55409caaee3..148e752a70f7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -256,6 +256,18 @@ int pfn_valid(unsigned long pfn) } EXPORT_SYMBOL(pfn_valid); +int pfn_is_map_memory(unsigned long pfn) +{ + phys_addr_t addr = PFN_PHYS(pfn); + + /* avoid false positives for bogus PFNs, see comment in pfn_valid() */ + if (PHYS_PFN(addr) != pfn) + return 0; + + return memblock_is_map_memory(addr); +} +EXPORT_SYMBOL(pfn_is_map_memory); + static phys_addr_t memory_limit = PHYS_ADDR_MAX; /* diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b5e83c46b23e..b7c81dacabf0 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -43,7 +43,7 @@ static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, /* * Don't allow RAM to be mapped. */ - if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr)))) + if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr)))) return NULL; area = get_vm_area_caller(size, VM_IOREMAP, caller); @@ -84,7 +84,7 @@ EXPORT_SYMBOL(iounmap); void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) { /* For normal memory we already have a cacheable mapping. */ - if (pfn_valid(__phys_to_pfn(phys_addr))) + if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) return (void __iomem *)__phys_to_virt(phys_addr); return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index add67deb4910..7553a7eab3db 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -82,7 +82,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { - if (!pfn_valid(pfn)) + if (!pfn_is_map_memory(pfn)) return pgprot_noncached(vma_prot); else if (file->f_flags & O_SYNC) return pgprot_writecombine(vma_prot); -- cgit v1.2.3 From a7d9f306ba7052056edf9ccae596aeb400226af8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 30 Jun 2021 18:51:22 -0700 Subject: arm64: drop pfn_valid_within() and simplify pfn_valid() The arm64's version of pfn_valid() differs from the generic because of two reasons: * Parts of the memory map are freed during boot. This makes it necessary to verify that there is actual physical memory that corresponds to a pfn which is done by querying memblock. * There are NOMAP memory regions. These regions are not mapped in the linear map and until the previous commit the struct pages representing these areas had default values. As the consequence of absence of the special treatment of NOMAP regions in the memory map it was necessary to use memblock_is_map_memory() in pfn_valid() and to have pfn_valid_within() aliased to pfn_valid() so that generic mm functionality would not treat a NOMAP page as a normal page. Since the NOMAP regions are now marked as PageReserved(), pfn walkers and the rest of core mm will treat them as unusable memory and thus pfn_valid_within() is no longer required at all and can be disabled on arm64. pfn_valid() can be slightly simplified by replacing memblock_is_map_memory() with memblock_is_memory(). [rppt@kernel.org: fix merge fix] Link: https://lkml.kernel.org/r/YJtoQhidtIJOhYsV@kernel.org Link: https://lkml.kernel.org/r/20210511100550.28178-5-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Acked-by: Ard Biesheuvel Reviewed-by: Kefeng Wang Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Marc Zyngier Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 1 - arch/arm64/mm/init.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 937bdf0c3859..59c9255ab9d0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -201,7 +201,6 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO - select HOLES_IN_ZONE select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 148e752a70f7..725aa84f2faa 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -252,7 +252,7 @@ int pfn_valid(unsigned long pfn) if (!early_section(ms)) return pfn_section_valid(ms, pfn); - return memblock_is_map_memory(addr); + return memblock_is_memory(addr); } EXPORT_SYMBOL(pfn_valid); -- cgit v1.2.3 From 16c9afc776608324ca71c0bc354987bab532f51d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 30 Jun 2021 18:51:26 -0700 Subject: arm64/mm: drop HAVE_ARCH_PFN_VALID CONFIG_SPARSEMEM_VMEMMAP is now the only available memory model on arm64 platforms and free_unused_memmap() would just return without creating any holes in the memmap mapping. There is no need for any special handling in pfn_valid() and HAVE_ARCH_PFN_VALID can just be dropped. This also moves the pfn upper bits sanity check into generic pfn_valid(). Link: https://lkml.kernel.org/r/1621947349-25421-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Catalin Marinas Cc: Will Deacon Cc: David Hildenbrand Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/page.h | 1 - arch/arm64/mm/init.c | 37 ------------------------------------- include/linux/mmzone.h | 9 +++++++++ 4 files changed, 9 insertions(+), 39 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 59c9255ab9d0..84f0216d3c5c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -154,7 +154,6 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PFN_VALID select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 75ddfe671393..fcbef3eec4b2 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,7 +37,6 @@ void copy_highpage(struct page *to, struct page *from); typedef struct page *pgtable_t; -int pfn_valid(unsigned long pfn); int pfn_is_map_memory(unsigned long pfn); #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 725aa84f2faa..49019ea0c8a8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -219,43 +219,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init(max_zone_pfns); } -int pfn_valid(unsigned long pfn) -{ - phys_addr_t addr = PFN_PHYS(pfn); - struct mem_section *ms; - - /* - * Ensure the upper PAGE_SHIFT bits are clear in the - * pfn. Else it might lead to false positives when - * some of the upper bits are set, but the lower bits - * match a valid pfn. - */ - if (PHYS_PFN(addr) != pfn) - return 0; - - if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) - return 0; - - ms = __pfn_to_section(pfn); - if (!valid_section(ms)) - return 0; - - /* - * ZONE_DEVICE memory does not have the memblock entries. - * memblock_is_map_memory() check for ZONE_DEVICE based - * addresses will always fail. Even the normal hotplugged - * memory will never have MEMBLOCK_NOMAP flag set in their - * memblock entries. Skip memblock search for all non early - * memory sections covering all of hotplug memory including - * both normal and ZONE_DEVICE based. - */ - if (!early_section(ms)) - return pfn_section_valid(ms, pfn); - - return memblock_is_memory(addr); -} -EXPORT_SYMBOL(pfn_valid); - int pfn_is_map_memory(unsigned long pfn) { phys_addr_t addr = PFN_PHYS(pfn); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7da43337ad23..7bc7e41b6c31 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1460,6 +1460,15 @@ static inline int pfn_valid(unsigned long pfn) { struct mem_section *ms; + /* + * Ensure the upper PAGE_SHIFT bits are clear in the + * pfn. Else it might lead to false positives when + * some of the upper bits are set, but the lower bits + * match a valid pfn. + */ + if (PHYS_PFN(PFN_PHYS(pfn)) != pfn) + return 0; + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __nr_to_section(pfn_to_section_nr(pfn)); -- cgit v1.2.3