From 453431a54934d917153c65211b2dabf45562ca88 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 6 Aug 2020 23:18:13 -0700 Subject: mm, treewide: rename kzfree() to kfree_sensitive() As said by Linus: A symmetric naming is only helpful if it implies symmetries in use. Otherwise it's actively misleading. In "kzalloc()", the z is meaningful and an important part of what the caller wants. In "kzfree()", the z is actively detrimental, because maybe in the future we really _might_ want to use that "memfill(0xdeadbeef)" or something. The "zero" part of the interface isn't even _relevant_. The main reason that kzfree() exists is to clear sensitive information that should not be leaked to other future users of the same memory objects. Rename kzfree() to kfree_sensitive() to follow the example of the recently added kvfree_sensitive() and make the intention of the API more explicit. In addition, memzero_explicit() is used to clear the memory to make sure that it won't get optimized away by the compiler. The renaming is done by using the command sequence: git grep -w --name-only kzfree |\ xargs sed -i 's/kzfree/kfree_sensitive/' followed by some editing of the kfree_sensitive() kerneldoc and adding a kzfree backward compatibility macro in slab.h. [akpm@linux-foundation.org: fs/crypto/inline_crypt.c needs linux/slab.h] [akpm@linux-foundation.org: fix fs/crypto/inline_crypt.c some more] Suggested-by: Joe Perches Signed-off-by: Waiman Long Signed-off-by: Andrew Morton Acked-by: David Howells Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Jarkko Sakkinen Cc: James Morris Cc: "Serge E. Hallyn" Cc: Joe Perches Cc: Matthew Wilcox Cc: David Rientjes Cc: Dan Carpenter Cc: "Jason A . Donenfeld" Link: http://lkml.kernel.org/r/20200616154311.12314-3-longman@redhat.com Signed-off-by: Linus Torvalds --- lib/mpi/mpiutil.c | 6 +++--- lib/test_kasan.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 20ed0f766787..4cd2b335cb7f 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -69,7 +69,7 @@ void mpi_free_limb_space(mpi_ptr_t a) if (!a) return; - kzfree(a); + kfree_sensitive(a); } void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) @@ -95,7 +95,7 @@ int mpi_resize(MPI a, unsigned nlimbs) if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); - kzfree(a->d); + kfree_sensitive(a->d); a->d = p; } else { a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); @@ -112,7 +112,7 @@ void mpi_free(MPI a) return; if (a->flags & 4) - kzfree(a->d); + kfree_sensitive(a->d); else mpi_free_limb_space(a->d); diff --git a/lib/test_kasan.c b/lib/test_kasan.c index dc2c6a51d11a..e4d9a86b174b 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -766,15 +766,15 @@ static noinline void __init kmalloc_double_kzfree(void) char *ptr; size_t size = 16; - pr_info("double-free (kzfree)\n"); + pr_info("double-free (kfree_sensitive)\n"); ptr = kmalloc(size, GFP_KERNEL); if (!ptr) { pr_err("Allocation failed\n"); return; } - kzfree(ptr); - kzfree(ptr); + kfree_sensitive(ptr); + kfree_sensitive(ptr); } #ifdef CONFIG_KASAN_VMALLOC -- cgit v1.2.3 From ab05eabfa18a6a537a73408642177e4a803317dc Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 6 Aug 2020 23:22:51 -0700 Subject: mm: move lib/ioremap.c to mm/ The functionality in lib/ioremap.c deals with pagetables, vmalloc and caches, so it naturally belongs to mm/ Moving it there will also allow declaring p?d_alloc_track functions in an header file inside mm/ rather than having those declarations in include/linux/mm.h Suggested-by: Andrew Morton Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Reviewed-by: Pekka Enberg Cc: Abdul Haleem Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Christophe Leroy Cc: Joerg Roedel Cc: Joerg Roedel Cc: Max Filippov Cc: Peter Zijlstra (Intel) Cc: Satheesh Rajendran Cc: Stafford Horne Cc: Stephen Rothwell Cc: Steven Rostedt Cc: Geert Uytterhoeven Cc: Matthew Wilcox Link: http://lkml.kernel.org/r/20200627143453.31835-8-rppt@kernel.org Signed-off-by: Linus Torvalds --- lib/Makefile | 1 - lib/ioremap.c | 287 ---------------------------------------------------------- mm/Makefile | 2 +- mm/ioremap.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 288 insertions(+), 289 deletions(-) delete mode 100644 lib/ioremap.c create mode 100644 mm/ioremap.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 435f7f13b8aa..f39962104036 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -37,7 +37,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o lib-$(CONFIG_PRINTK) += dump_stack.o -lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o diff --git a/lib/ioremap.c b/lib/ioremap.c deleted file mode 100644 index 5ee3526f71b8..000000000000 --- a/lib/ioremap.c +++ /dev/null @@ -1,287 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -static int __read_mostly ioremap_p4d_capable; -static int __read_mostly ioremap_pud_capable; -static int __read_mostly ioremap_pmd_capable; -static int __read_mostly ioremap_huge_disabled; - -static int __init set_nohugeiomap(char *str) -{ - ioremap_huge_disabled = 1; - return 0; -} -early_param("nohugeiomap", set_nohugeiomap); - -void __init ioremap_huge_init(void) -{ - if (!ioremap_huge_disabled) { - if (arch_ioremap_p4d_supported()) - ioremap_p4d_capable = 1; - if (arch_ioremap_pud_supported()) - ioremap_pud_capable = 1; - if (arch_ioremap_pmd_supported()) - ioremap_pmd_capable = 1; - } -} - -static inline int ioremap_p4d_enabled(void) -{ - return ioremap_p4d_capable; -} - -static inline int ioremap_pud_enabled(void) -{ - return ioremap_pud_capable; -} - -static inline int ioremap_pmd_enabled(void) -{ - return ioremap_pmd_capable; -} - -#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ -static inline int ioremap_p4d_enabled(void) { return 0; } -static inline int ioremap_pud_enabled(void) { return 0; } -static inline int ioremap_pmd_enabled(void) { return 0; } -#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ - -static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot, - pgtbl_mod_mask *mask) -{ - pte_t *pte; - u64 pfn; - - pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel_track(pmd, addr, mask); - if (!pte) - return -ENOMEM; - do { - BUG_ON(!pte_none(*pte)); - set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); - pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); - *mask |= PGTBL_PTE_MODIFIED; - return 0; -} - -static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, - pgprot_t prot) -{ - if (!ioremap_pmd_enabled()) - return 0; - - if ((end - addr) != PMD_SIZE) - return 0; - - if (!IS_ALIGNED(addr, PMD_SIZE)) - return 0; - - if (!IS_ALIGNED(phys_addr, PMD_SIZE)) - return 0; - - if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr)) - return 0; - - return pmd_set_huge(pmd, phys_addr, prot); -} - -static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot, - pgtbl_mod_mask *mask) -{ - pmd_t *pmd; - unsigned long next; - - pmd = pmd_alloc_track(&init_mm, pud, addr, mask); - if (!pmd) - return -ENOMEM; - do { - next = pmd_addr_end(addr, end); - - if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) { - *mask |= PGTBL_PMD_MODIFIED; - continue; - } - - if (ioremap_pte_range(pmd, addr, next, phys_addr, prot, mask)) - return -ENOMEM; - } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); - return 0; -} - -static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, - pgprot_t prot) -{ - if (!ioremap_pud_enabled()) - return 0; - - if ((end - addr) != PUD_SIZE) - return 0; - - if (!IS_ALIGNED(addr, PUD_SIZE)) - return 0; - - if (!IS_ALIGNED(phys_addr, PUD_SIZE)) - return 0; - - if (pud_present(*pud) && !pud_free_pmd_page(pud, addr)) - return 0; - - return pud_set_huge(pud, phys_addr, prot); -} - -static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot, - pgtbl_mod_mask *mask) -{ - pud_t *pud; - unsigned long next; - - pud = pud_alloc_track(&init_mm, p4d, addr, mask); - if (!pud) - return -ENOMEM; - do { - next = pud_addr_end(addr, end); - - if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) { - *mask |= PGTBL_PUD_MODIFIED; - continue; - } - - if (ioremap_pmd_range(pud, addr, next, phys_addr, prot, mask)) - return -ENOMEM; - } while (pud++, phys_addr += (next - addr), addr = next, addr != end); - return 0; -} - -static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, - pgprot_t prot) -{ - if (!ioremap_p4d_enabled()) - return 0; - - if ((end - addr) != P4D_SIZE) - return 0; - - if (!IS_ALIGNED(addr, P4D_SIZE)) - return 0; - - if (!IS_ALIGNED(phys_addr, P4D_SIZE)) - return 0; - - if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr)) - return 0; - - return p4d_set_huge(p4d, phys_addr, prot); -} - -static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot, - pgtbl_mod_mask *mask) -{ - p4d_t *p4d; - unsigned long next; - - p4d = p4d_alloc_track(&init_mm, pgd, addr, mask); - if (!p4d) - return -ENOMEM; - do { - next = p4d_addr_end(addr, end); - - if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) { - *mask |= PGTBL_P4D_MODIFIED; - continue; - } - - if (ioremap_pud_range(p4d, addr, next, phys_addr, prot, mask)) - return -ENOMEM; - } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); - return 0; -} - -int ioremap_page_range(unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot) -{ - pgd_t *pgd; - unsigned long start; - unsigned long next; - int err; - pgtbl_mod_mask mask = 0; - - might_sleep(); - BUG_ON(addr >= end); - - start = addr; - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, end); - err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot, - &mask); - if (err) - break; - } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); - - flush_cache_vmap(start, end); - - if (mask & ARCH_PAGE_TABLE_SYNC_MASK) - arch_sync_kernel_mappings(start, end); - - return err; -} - -#ifdef CONFIG_GENERIC_IOREMAP -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) -{ - unsigned long offset, vaddr; - phys_addr_t last_addr; - struct vm_struct *area; - - /* Disallow wrap-around or zero size */ - last_addr = addr + size - 1; - if (!size || last_addr < addr) - return NULL; - - /* Page-align mappings */ - offset = addr & (~PAGE_MASK); - addr -= offset; - size = PAGE_ALIGN(size + offset); - - area = get_vm_area_caller(size, VM_IOREMAP, - __builtin_return_address(0)); - if (!area) - return NULL; - vaddr = (unsigned long)area->addr; - - if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { - free_vm_area(area); - return NULL; - } - - return (void __iomem *)(vaddr + offset); -} -EXPORT_SYMBOL(ioremap_prot); - -void iounmap(volatile void __iomem *addr) -{ - vunmap((void *)((unsigned long)addr & PAGE_MASK)); -} -EXPORT_SYMBOL(iounmap); -#endif /* CONFIG_GENERIC_IOREMAP */ diff --git a/mm/Makefile b/mm/Makefile index 6e9d46b2efc9..d5649f1c12c0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -38,7 +38,7 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ msync.o page_vma_mapped.o pagewalk.o \ - pgtable-generic.o rmap.o vmalloc.o + pgtable-generic.o rmap.o vmalloc.o ioremap.o ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/mm/ioremap.c b/mm/ioremap.c new file mode 100644 index 000000000000..5ee3526f71b8 --- /dev/null +++ b/mm/ioremap.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +static int __read_mostly ioremap_p4d_capable; +static int __read_mostly ioremap_pud_capable; +static int __read_mostly ioremap_pmd_capable; +static int __read_mostly ioremap_huge_disabled; + +static int __init set_nohugeiomap(char *str) +{ + ioremap_huge_disabled = 1; + return 0; +} +early_param("nohugeiomap", set_nohugeiomap); + +void __init ioremap_huge_init(void) +{ + if (!ioremap_huge_disabled) { + if (arch_ioremap_p4d_supported()) + ioremap_p4d_capable = 1; + if (arch_ioremap_pud_supported()) + ioremap_pud_capable = 1; + if (arch_ioremap_pmd_supported()) + ioremap_pmd_capable = 1; + } +} + +static inline int ioremap_p4d_enabled(void) +{ + return ioremap_p4d_capable; +} + +static inline int ioremap_pud_enabled(void) +{ + return ioremap_pud_capable; +} + +static inline int ioremap_pmd_enabled(void) +{ + return ioremap_pmd_capable; +} + +#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_p4d_enabled(void) { return 0; } +static inline int ioremap_pud_enabled(void) { return 0; } +static inline int ioremap_pmd_enabled(void) { return 0; } +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ + +static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + pgtbl_mod_mask *mask) +{ + pte_t *pte; + u64 pfn; + + pfn = phys_addr >> PAGE_SHIFT; + pte = pte_alloc_kernel_track(pmd, addr, mask); + if (!pte) + return -ENOMEM; + do { + BUG_ON(!pte_none(*pte)); + set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); + *mask |= PGTBL_PTE_MODIFIED; + return 0; +} + +static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, + pgprot_t prot) +{ + if (!ioremap_pmd_enabled()) + return 0; + + if ((end - addr) != PMD_SIZE) + return 0; + + if (!IS_ALIGNED(addr, PMD_SIZE)) + return 0; + + if (!IS_ALIGNED(phys_addr, PMD_SIZE)) + return 0; + + if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr)) + return 0; + + return pmd_set_huge(pmd, phys_addr, prot); +} + +static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + pgtbl_mod_mask *mask) +{ + pmd_t *pmd; + unsigned long next; + + pmd = pmd_alloc_track(&init_mm, pud, addr, mask); + if (!pmd) + return -ENOMEM; + do { + next = pmd_addr_end(addr, end); + + if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) { + *mask |= PGTBL_PMD_MODIFIED; + continue; + } + + if (ioremap_pte_range(pmd, addr, next, phys_addr, prot, mask)) + return -ENOMEM; + } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); + return 0; +} + +static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, + pgprot_t prot) +{ + if (!ioremap_pud_enabled()) + return 0; + + if ((end - addr) != PUD_SIZE) + return 0; + + if (!IS_ALIGNED(addr, PUD_SIZE)) + return 0; + + if (!IS_ALIGNED(phys_addr, PUD_SIZE)) + return 0; + + if (pud_present(*pud) && !pud_free_pmd_page(pud, addr)) + return 0; + + return pud_set_huge(pud, phys_addr, prot); +} + +static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + pgtbl_mod_mask *mask) +{ + pud_t *pud; + unsigned long next; + + pud = pud_alloc_track(&init_mm, p4d, addr, mask); + if (!pud) + return -ENOMEM; + do { + next = pud_addr_end(addr, end); + + if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) { + *mask |= PGTBL_PUD_MODIFIED; + continue; + } + + if (ioremap_pmd_range(pud, addr, next, phys_addr, prot, mask)) + return -ENOMEM; + } while (pud++, phys_addr += (next - addr), addr = next, addr != end); + return 0; +} + +static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, + pgprot_t prot) +{ + if (!ioremap_p4d_enabled()) + return 0; + + if ((end - addr) != P4D_SIZE) + return 0; + + if (!IS_ALIGNED(addr, P4D_SIZE)) + return 0; + + if (!IS_ALIGNED(phys_addr, P4D_SIZE)) + return 0; + + if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr)) + return 0; + + return p4d_set_huge(p4d, phys_addr, prot); +} + +static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + pgtbl_mod_mask *mask) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_alloc_track(&init_mm, pgd, addr, mask); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + + if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) { + *mask |= PGTBL_P4D_MODIFIED; + continue; + } + + if (ioremap_pud_range(p4d, addr, next, phys_addr, prot, mask)) + return -ENOMEM; + } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); + return 0; +} + +int ioremap_page_range(unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + pgd_t *pgd; + unsigned long start; + unsigned long next; + int err; + pgtbl_mod_mask mask = 0; + + might_sleep(); + BUG_ON(addr >= end); + + start = addr; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot, + &mask); + if (err) + break; + } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); + + flush_cache_vmap(start, end); + + if (mask & ARCH_PAGE_TABLE_SYNC_MASK) + arch_sync_kernel_mappings(start, end); + + return err; +} + +#ifdef CONFIG_GENERIC_IOREMAP +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +{ + unsigned long offset, vaddr; + phys_addr_t last_addr; + struct vm_struct *area; + + /* Disallow wrap-around or zero size */ + last_addr = addr + size - 1; + if (!size || last_addr < addr) + return NULL; + + /* Page-align mappings */ + offset = addr & (~PAGE_MASK); + addr -= offset; + size = PAGE_ALIGN(size + offset); + + area = get_vm_area_caller(size, VM_IOREMAP, + __builtin_return_address(0)); + if (!area) + return NULL; + vaddr = (unsigned long)area->addr; + + if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { + free_vm_area(area); + return NULL; + } + + return (void __iomem *)(vaddr + offset); +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(volatile void __iomem *addr) +{ + vunmap((void *)((unsigned long)addr & PAGE_MASK)); +} +EXPORT_SYMBOL(iounmap); +#endif /* CONFIG_GENERIC_IOREMAP */ -- cgit v1.2.3 From 0a4954a850b0c4d0a5d18b1a55d6e5a653e362b5 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 6 Aug 2020 23:23:11 -0700 Subject: percpu_counter: add percpu_counter_sync() percpu_counter's accuracy is related to its batch size. For a percpu_counter with a big batch, its deviation could be big, so when the counter's batch is runtime changed to a smaller value for better accuracy, there could also be requirment to reduce the big deviation. So add a percpu-counter sync function to be run on each CPU. Reported-by: kernel test robot Signed-off-by: Feng Tang Signed-off-by: Andrew Morton Cc: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Michal Hocko Cc: Qian Cai Cc: Andi Kleen Cc: Huang Ying Cc: Dave Hansen Cc: Haiyang Zhang Cc: Johannes Weiner Cc: Kees Cook Cc: "K. Y. Srinivasan" Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Tim Chen Link: http://lkml.kernel.org/r/1594389708-60781-4-git-send-email-feng.tang@intel.com Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 4 ++++ lib/percpu_counter.c | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'lib') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 0a4f54dd4737..01861eebed79 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -44,6 +44,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); +void percpu_counter_sync(struct percpu_counter *fbc); static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) { @@ -172,6 +173,9 @@ static inline bool percpu_counter_initialized(struct percpu_counter *fbc) return true; } +static inline void percpu_counter_sync(struct percpu_counter *fbc) +{ +} #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index a66595ba5543..a2345de90e93 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -98,6 +98,25 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) } EXPORT_SYMBOL(percpu_counter_add_batch); +/* + * For percpu_counter with a big batch, the devication of its count could + * be big, and there is requirement to reduce the deviation, like when the + * counter's batch could be runtime decreased to get a better accuracy, + * which can be achieved by running this sync function on each CPU. + */ +void percpu_counter_sync(struct percpu_counter *fbc) +{ + unsigned long flags; + s64 count; + + raw_spin_lock_irqsave(&fbc->lock, flags); + count = __this_cpu_read(*fbc->counters); + fbc->count += count; + __this_cpu_sub(*fbc->counters, count); + raw_spin_unlock_irqrestore(&fbc->lock, flags); +} +EXPORT_SYMBOL(percpu_counter_sync); + /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() -- cgit v1.2.3 From 7a3767f83f173f9df9af9eb04959abe2966d4cc8 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 6 Aug 2020 23:24:28 -0700 Subject: kasan: improve and simplify Kconfig.kasan Turn 'KASAN' into a menuconfig, to avoid cluttering its parent menu with the suboptions if enabled. Use 'if KASAN ... endif' instead of having to 'depend on KASAN' for each entry. Signed-off-by: Marco Elver Signed-off-by: Andrew Morton Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Daniel Axtens Cc: Dmitry Vyukov Cc: Nick Desaulniers Cc: Walter Wu Link: http://lkml.kernel.org/r/20200629104157.3242503-1-elver@google.com Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 34b84bcbd3d9..89053defc0d9 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -18,7 +18,7 @@ config CC_HAS_KASAN_SW_TAGS config CC_HAS_WORKING_NOSANITIZE_ADDRESS def_bool !CC_IS_GCC || GCC_VERSION >= 80300 -config KASAN +menuconfig KASAN bool "KASAN: runtime memory debugger" depends on (HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \ (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS) @@ -29,9 +29,10 @@ config KASAN designed to find out-of-bounds accesses and use-after-free bugs. See Documentation/dev-tools/kasan.rst for details. +if KASAN + choice prompt "KASAN mode" - depends on KASAN default KASAN_GENERIC help KASAN has two modes: generic KASAN (similar to userspace ASan, @@ -88,7 +89,6 @@ endchoice choice prompt "Instrumentation type" - depends on KASAN default KASAN_OUTLINE config KASAN_OUTLINE @@ -113,7 +113,6 @@ endchoice config KASAN_STACK_ENABLE bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST - depends on KASAN help The LLVM stack address sanitizer has a know problem that causes excessive stack usage in a lot of functions, see @@ -134,7 +133,7 @@ config KASAN_STACK config KASAN_S390_4_LEVEL_PAGING bool "KASan: use 4-level paging" - depends on KASAN && S390 + depends on S390 help Compiling the kernel with KASan disables automatic 3-level vs 4-level paging selection. 3-level paging is used by default (up @@ -151,7 +150,7 @@ config KASAN_SW_TAGS_IDENTIFY config KASAN_VMALLOC bool "Back mappings in vmalloc space with real shadow memory" - depends on KASAN && HAVE_ARCH_KASAN_VMALLOC + depends on HAVE_ARCH_KASAN_VMALLOC help By default, the shadow region for vmalloc space is the read-only zero page. This means that KASAN cannot detect errors involving @@ -164,8 +163,10 @@ config KASAN_VMALLOC config TEST_KASAN tristate "Module for testing KASAN for bug detection" - depends on m && KASAN + depends on m help This is a test module doing various nasty things like out of bounds accesses, use after free. It is useful for testing kernel debugging features like KASAN. + +endif # KASAN -- cgit v1.2.3 From ac4766be5e2d103ebf5d3f2ff676f866f6a56d44 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 6 Aug 2020 23:24:31 -0700 Subject: kasan: update required compiler versions in documentation Updates the recently changed compiler requirements for KASAN. In particular, we require GCC >= 8.3.0, and add a note that Clang 11 supports OOB detection of globals. Fixes: 7b861a53e46b ("kasan: Bump required compiler version") Fixes: acf7b0bf7dcf ("kasan: Fix required compiler version") Signed-off-by: Marco Elver Signed-off-by: Andrew Morton Reviewed-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Nick Desaulniers Cc: Walter Wu Cc: Arnd Bergmann Cc: Daniel Axtens Link: http://lkml.kernel.org/r/20200629104157.3242503-2-elver@google.com Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kasan.rst | 7 ++----- lib/Kconfig.kasan | 24 +++++++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index c652d740735d..15a2a53e77b0 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -13,11 +13,8 @@ KASAN uses compile-time instrumentation to insert validity checks before every memory access, and therefore requires a compiler version that supports that. Generic KASAN is supported in both GCC and Clang. With GCC it requires version -4.9.2 or later for basic support and version 5.0 or later for detection of -out-of-bounds accesses for stack and global variables and for inline -instrumentation mode (see the Usage section). With Clang it requires version -7.0.0 or later and it doesn't support detection of out-of-bounds accesses for -global variables yet. +8.3.0 or later. With Clang it requires version 7.0.0 or later, but detection of +out-of-bounds accesses for global variables is only supported since Clang 11. Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later. diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 89053defc0d9..047b53dbfd58 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -40,6 +40,7 @@ choice software tag-based KASAN (a version based on software memory tagging, arm64 only, similar to userspace HWASan, enabled with CONFIG_KASAN_SW_TAGS). + Both generic and tag-based KASAN are strictly debugging features. config KASAN_GENERIC @@ -51,16 +52,18 @@ config KASAN_GENERIC select STACKDEPOT help Enables generic KASAN mode. - Supported in both GCC and Clang. With GCC it requires version 4.9.2 - or later for basic support and version 5.0 or later for detection of - out-of-bounds accesses for stack and global variables and for inline - instrumentation mode (CONFIG_KASAN_INLINE). With Clang it requires - version 3.7.0 or later and it doesn't support detection of - out-of-bounds accesses for global variables yet. + + This mode is supported in both GCC and Clang. With GCC it requires + version 8.3.0 or later. With Clang it requires version 7.0.0 or + later, but detection of out-of-bounds accesses for global variables + is supported only since Clang 11. + This mode consumes about 1/8th of available memory at kernel start and introduces an overhead of ~x1.5 for the rest of the allocations. The performance slowdown is ~x3. + For better error detection enable CONFIG_STACKTRACE. + Currently CONFIG_KASAN_GENERIC doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). @@ -73,15 +76,19 @@ config KASAN_SW_TAGS select STACKDEPOT help Enables software tag-based KASAN mode. + This mode requires Top Byte Ignore support by the CPU and therefore - is only supported for arm64. - This mode requires Clang version 7.0.0 or later. + is only supported for arm64. This mode requires Clang version 7.0.0 + or later. + This mode consumes about 1/16th of available memory at kernel start and introduces an overhead of ~20% for the rest of the allocations. This mode may potentially introduce problems relating to pointer casting and comparison, as it embeds tags into the top byte of each pointer. + For better error detection enable CONFIG_STACKTRACE. + Currently CONFIG_KASAN_SW_TAGS doesn't work with CONFIG_DEBUG_SLAB (the resulting kernel does not boot). @@ -107,7 +114,6 @@ config KASAN_INLINE memory accesses. This is faster than outline (in some workloads it gives about x2 boost over outline instrumentation), but make kernel's .text size much bigger. - For CONFIG_KASAN_GENERIC this requires GCC 5.0 or later. endchoice -- cgit v1.2.3 From 387d6e46681b0d5b64f5d13639f90e3df15e020d Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Thu, 6 Aug 2020 23:24:42 -0700 Subject: kasan: add tests for call_rcu stack recording Test call_rcu() call stack recording and verify whether it correctly is printed in KASAN report. Signed-off-by: Walter Wu Signed-off-by: Andrew Morton Tested-by: Dmitry Vyukov Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Matthias Brugger Cc: Joel Fernandes Cc: Jonathan Corbet Cc: Josh Triplett Cc: Lai Jiangshan Cc: Mathieu Desnoyers Cc: "Paul E . McKenney" Link: http://lkml.kernel.org/r/20200601051045.1294-1-walter-zh.wu@mediatek.com Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index e4d9a86b174b..7f95f85421b2 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -801,6 +801,35 @@ static noinline void __init vmalloc_oob(void) static void __init vmalloc_oob(void) {} #endif +static struct kasan_rcu_info { + int i; + struct rcu_head rcu; +} *global_rcu_ptr; + +static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp) +{ + struct kasan_rcu_info *fp = container_of(rp, + struct kasan_rcu_info, rcu); + + kfree(fp); + fp->i = 1; +} + +static noinline void __init kasan_rcu_uaf(void) +{ + struct kasan_rcu_info *ptr; + + pr_info("use-after-free in kasan_rcu_reclaim\n"); + ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + global_rcu_ptr = rcu_dereference_protected(ptr, NULL); + call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim); +} + static int __init kmalloc_tests_init(void) { /* @@ -848,6 +877,7 @@ static int __init kmalloc_tests_init(void) kasan_bitops(); kmalloc_double_kzfree(); vmalloc_oob(); + kasan_rcu_uaf(); kasan_restore_multi_shot(multishot); -- cgit v1.2.3 From f33a01492a24a276e0bc1c932bcefdb8c1125159 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Thu, 6 Aug 2020 23:24:54 -0700 Subject: lib/test_kasan.c: fix KASAN unit tests for tag-based KASAN We use tag-based KASAN, then KASAN unit tests don't detect out-of-bounds memory access. They need to be fixed. With tag-based KASAN, the state of each 16 aligned bytes of memory is encoded in one shadow byte and the shadow value is tag of pointer, so we need to read next shadow byte, the shadow value is not equal to tag value of pointer, so that tag-based KASAN will detect out-of-bounds memory access. [walter-zh.wu@mediatek.com: use KASAN_SHADOW_SCALE_SIZE instead of 13] Link: http://lkml.kernel.org/r/20200708132524.11688-1-walter-zh.wu@mediatek.com Suggested-by: Dmitry Vyukov Signed-off-by: Walter Wu Signed-off-by: Andrew Morton Reviewed-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Matthias Brugger Link: http://lkml.kernel.org/r/20200706115039.16750-1-walter-zh.wu@mediatek.com Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 7f95f85421b2..f362f2662938 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -23,6 +23,10 @@ #include +#include "../mm/kasan/kasan.h" + +#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE) + /* * We assign some test results to these globals to make sure the tests * are not eliminated as dead code. @@ -48,7 +52,8 @@ static noinline void __init kmalloc_oob_right(void) return; } - ptr[size] = 'x'; + ptr[size + OOB_TAG_OFF] = 'x'; + kfree(ptr); } @@ -100,7 +105,8 @@ static noinline void __init kmalloc_pagealloc_oob_right(void) return; } - ptr[size] = 0; + ptr[size + OOB_TAG_OFF] = 0; + kfree(ptr); } @@ -170,7 +176,8 @@ static noinline void __init kmalloc_oob_krealloc_more(void) return; } - ptr2[size2] = 'x'; + ptr2[size2 + OOB_TAG_OFF] = 'x'; + kfree(ptr2); } @@ -188,7 +195,9 @@ static noinline void __init kmalloc_oob_krealloc_less(void) kfree(ptr1); return; } - ptr2[size2] = 'x'; + + ptr2[size2 + OOB_TAG_OFF] = 'x'; + kfree(ptr2); } @@ -224,7 +233,8 @@ static noinline void __init kmalloc_oob_memset_2(void) return; } - memset(ptr+7, 0, 2); + memset(ptr + 7 + OOB_TAG_OFF, 0, 2); + kfree(ptr); } @@ -240,7 +250,8 @@ static noinline void __init kmalloc_oob_memset_4(void) return; } - memset(ptr+5, 0, 4); + memset(ptr + 5 + OOB_TAG_OFF, 0, 4); + kfree(ptr); } @@ -257,7 +268,8 @@ static noinline void __init kmalloc_oob_memset_8(void) return; } - memset(ptr+1, 0, 8); + memset(ptr + 1 + OOB_TAG_OFF, 0, 8); + kfree(ptr); } @@ -273,7 +285,8 @@ static noinline void __init kmalloc_oob_memset_16(void) return; } - memset(ptr+1, 0, 16); + memset(ptr + 1 + OOB_TAG_OFF, 0, 16); + kfree(ptr); } @@ -289,7 +302,8 @@ static noinline void __init kmalloc_oob_in_memset(void) return; } - memset(ptr, 0, size+5); + memset(ptr, 0, size + 5 + OOB_TAG_OFF); + kfree(ptr); } @@ -423,7 +437,8 @@ static noinline void __init kmem_cache_oob(void) return; } - *p = p[size]; + *p = p[size + OOB_TAG_OFF]; + kmem_cache_free(cache, p); kmem_cache_destroy(cache); } @@ -520,25 +535,25 @@ static noinline void __init copy_user_test(void) } pr_info("out-of-bounds in copy_from_user()\n"); - unused = copy_from_user(kmem, usermem, size + 1); + unused = copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF); pr_info("out-of-bounds in copy_to_user()\n"); - unused = copy_to_user(usermem, kmem, size + 1); + unused = copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF); pr_info("out-of-bounds in __copy_from_user()\n"); - unused = __copy_from_user(kmem, usermem, size + 1); + unused = __copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF); pr_info("out-of-bounds in __copy_to_user()\n"); - unused = __copy_to_user(usermem, kmem, size + 1); + unused = __copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF); pr_info("out-of-bounds in __copy_from_user_inatomic()\n"); - unused = __copy_from_user_inatomic(kmem, usermem, size + 1); + unused = __copy_from_user_inatomic(kmem, usermem, size + 1 + OOB_TAG_OFF); pr_info("out-of-bounds in __copy_to_user_inatomic()\n"); - unused = __copy_to_user_inatomic(usermem, kmem, size + 1); + unused = __copy_to_user_inatomic(usermem, kmem, size + 1 + OOB_TAG_OFF); pr_info("out-of-bounds in strncpy_from_user()\n"); - unused = strncpy_from_user(kmem, usermem, size + 1); + unused = strncpy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF); vm_munmap((unsigned long)usermem, PAGE_SIZE); kfree(kmem); -- cgit v1.2.3 From 51dcc81c282dc401dfd8460a7e59546bc1b30e32 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 6 Aug 2020 23:25:12 -0700 Subject: kasan: adjust kasan_stack_oob for tag-based mode Use OOB_TAG_OFF as access offset to land the access into the next granule. Suggested-by: Walter Wu Signed-off-by: Andrey Konovalov Signed-off-by: Andrew Morton Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: Elena Petrova Cc: Marco Elver Cc: Vincenzo Frascino Cc: Ard Biesheuvel Link: http://lkml.kernel.org/r/403b259f1de49a7a3694531c851ac28326a586a8.1596199677.git.andreyknvl@google.com Link: http://lkml.kernel.org/r/3063ab1411e92bce36061a96e25b651212e70ba6.1596544734.git.andreyknvl@google.com Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index f362f2662938..53e953bb1d1d 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -488,7 +488,7 @@ static noinline void __init kasan_global_oob(void) static noinline void __init kasan_stack_oob(void) { char stack_array[10]; - volatile int i = 0; + volatile int i = OOB_TAG_OFF; char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; pr_info("out-of-bounds on stack\n"); -- cgit v1.2.3