summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-02-28 07:50:39 +0100
committerIngo Molnar <mingo@kernel.org>2019-02-28 07:50:39 +0100
commit0614621d89c43ea5b28456c2baf6b0c0e00ca81e (patch)
tree2dff833e972bdb65d30cb317985a0d41d2c40f0e /mm
parent0cf264b3133dce56a60ca8b4335d1f76fe26870a (diff)
parent7d762d69145a54d169f58e56d6dac57a5508debc (diff)
downloadlinux-0614621d89c43ea5b28456c2baf6b0c0e00ca81e.tar.bz2
Merge branch 'linus' into locking/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/debug.c4
-rw-r--r--mm/gup.c3
-rw-r--r--mm/kasan/Makefile2
-rw-r--r--mm/kasan/common.c29
-rw-r--r--mm/kasan/tags.c2
-rw-r--r--mm/kmemleak.c10
-rw-r--r--mm/maccess.c6
-rw-r--r--mm/memblock.c11
-rw-r--r--mm/memory_hotplug.c27
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/page_alloc.c20
-rw-r--r--mm/page_ext.c4
-rw-r--r--mm/shmem.c12
-rw-r--r--mm/slab.c15
-rw-r--r--mm/slab.h7
-rw-r--r--mm/slab_common.c3
-rw-r--r--mm/slub.c59
-rw-r--r--mm/swap.c17
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmscan.c10
20 files changed, 150 insertions, 99 deletions
diff --git a/mm/debug.c b/mm/debug.c
index 0abb987dad9b..1611cf00a137 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -44,7 +44,7 @@ const struct trace_print_flags vmaflag_names[] = {
void __dump_page(struct page *page, const char *reason)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping;
bool page_poisoned = PagePoisoned(page);
int mapcount;
@@ -58,6 +58,8 @@ void __dump_page(struct page *page, const char *reason)
goto hex_only;
}
+ mapping = page_mapping(page);
+
/*
* Avoid VM_BUG_ON() in page_mapcount().
* page->_mapcount space in struct page is used by sl[aou]b pages to
diff --git a/mm/gup.c b/mm/gup.c
index 05acd7e2eb22..75029649baca 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1674,7 +1674,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
if (!pmd_present(pmd))
return 0;
- if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+ if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
+ pmd_devmap(pmd))) {
/*
* NUMA hinting faults need to be handled in the GUP
* slowpath for accounting purposes and so that they
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index e2bb06c1b45e..5d1065efbd47 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -7,6 +7,8 @@ KCOV_INSTRUMENT := n
CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_generic.o = -pg
+CFLAGS_REMOVE_tags.o = -pg
+
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 73c9cbfdedf4..09b534fbba17 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -361,10 +361,15 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
* get different tags.
*/
static u8 assign_tag(struct kmem_cache *cache, const void *object,
- bool init, bool krealloc)
+ bool init, bool keep_tag)
{
- /* Reuse the same tag for krealloc'ed objects. */
- if (krealloc)
+ /*
+ * 1. When an object is kmalloc()'ed, two hooks are called:
+ * kasan_slab_alloc() and kasan_kmalloc(). We assign the
+ * tag only in the first one.
+ * 2. We reuse the same tag for krealloc'ed objects.
+ */
+ if (keep_tag)
return get_tag(object);
/*
@@ -405,12 +410,6 @@ void * __must_check kasan_init_slab_obj(struct kmem_cache *cache,
return (void *)object;
}
-void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object,
- gfp_t flags)
-{
- return kasan_kmalloc(cache, object, cache->object_size, flags);
-}
-
static inline bool shadow_invalid(u8 tag, s8 shadow_byte)
{
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
@@ -467,7 +466,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
}
static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
- size_t size, gfp_t flags, bool krealloc)
+ size_t size, gfp_t flags, bool keep_tag)
{
unsigned long redzone_start;
unsigned long redzone_end;
@@ -485,7 +484,7 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
KASAN_SHADOW_SCALE_SIZE);
if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
- tag = assign_tag(cache, object, false, krealloc);
+ tag = assign_tag(cache, object, false, keep_tag);
/* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */
kasan_unpoison_shadow(set_tag(object, tag), size);
@@ -498,10 +497,16 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
return set_tag(object, tag);
}
+void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object,
+ gfp_t flags)
+{
+ return __kasan_kmalloc(cache, object, cache->object_size, flags, false);
+}
+
void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object,
size_t size, gfp_t flags)
{
- return __kasan_kmalloc(cache, object, size, flags, false);
+ return __kasan_kmalloc(cache, object, size, flags, true);
}
EXPORT_SYMBOL(kasan_kmalloc);
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index 0777649e07c4..63fca3172659 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -46,7 +46,7 @@ void kasan_init_tags(void)
int cpu;
for_each_possible_cpu(cpu)
- per_cpu(prng_state, cpu) = get_random_u32();
+ per_cpu(prng_state, cpu) = (u32)get_cycles();
}
/*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index f9d9dc250428..707fa5579f66 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -574,6 +574,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
unsigned long flags;
struct kmemleak_object *object, *parent;
struct rb_node **link, *rb_parent;
+ unsigned long untagged_ptr;
object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
if (!object) {
@@ -619,8 +620,9 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
write_lock_irqsave(&kmemleak_lock, flags);
- min_addr = min(min_addr, ptr);
- max_addr = max(max_addr, ptr + size);
+ untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
+ min_addr = min(min_addr, untagged_ptr);
+ max_addr = max(max_addr, untagged_ptr + size);
link = &object_tree_root.rb_node;
rb_parent = NULL;
while (*link) {
@@ -1333,6 +1335,7 @@ static void scan_block(void *_start, void *_end,
unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
unsigned long *end = _end - (BYTES_PER_POINTER - 1);
unsigned long flags;
+ unsigned long untagged_ptr;
read_lock_irqsave(&kmemleak_lock, flags);
for (ptr = start; ptr < end; ptr++) {
@@ -1347,7 +1350,8 @@ static void scan_block(void *_start, void *_end,
pointer = *ptr;
kasan_enable_current();
- if (pointer < min_addr || pointer >= max_addr)
+ untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
+ if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
continue;
/*
diff --git a/mm/maccess.c b/mm/maccess.c
index f3416632e5a4..ec00be51a24f 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -30,10 +30,8 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
set_fs(KERNEL_DS);
pagefault_disable();
- current->kernel_uaccess_faults_ok++;
ret = __copy_from_user_inatomic(dst,
(__force const void __user *)src, size);
- current->kernel_uaccess_faults_ok--;
pagefault_enable();
set_fs(old_fs);
@@ -60,9 +58,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
set_fs(KERNEL_DS);
pagefault_disable();
- current->kernel_uaccess_faults_ok++;
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
- current->kernel_uaccess_faults_ok--;
pagefault_enable();
set_fs(old_fs);
@@ -98,13 +94,11 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
set_fs(KERNEL_DS);
pagefault_disable();
- current->kernel_uaccess_faults_ok++;
do {
ret = __get_user(*dst++, (const char __user __force *)src++);
} while (dst[-1] && ret == 0 && src - unsafe_addr < count);
- current->kernel_uaccess_faults_ok--;
dst[-1] = '\0';
pagefault_enable();
set_fs(old_fs);
diff --git a/mm/memblock.c b/mm/memblock.c
index 022d4cbb3618..ea31045ba704 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -26,6 +26,13 @@
#include "internal.h"
+#define INIT_MEMBLOCK_REGIONS 128
+#define INIT_PHYSMEM_REGIONS 4
+
+#ifndef INIT_MEMBLOCK_RESERVED_REGIONS
+# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS
+#endif
+
/**
* DOC: memblock overview
*
@@ -92,7 +99,7 @@ unsigned long max_pfn;
unsigned long long max_possible_pfn;
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
-static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif
@@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = {
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
- .reserved.max = INIT_MEMBLOCK_REGIONS,
+ .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 124e794867c5..1ad28323fb9f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1188,11 +1188,13 @@ static inline int pageblock_free(struct page *page)
return PageBuddy(page) && page_order(page) >= pageblock_order;
}
-/* Return the start of the next active pageblock after a given page */
-static struct page *next_active_pageblock(struct page *page)
+/* Return the pfn of the start of the next active pageblock after a given pfn */
+static unsigned long next_active_pageblock(unsigned long pfn)
{
+ struct page *page = pfn_to_page(pfn);
+
/* Ensure the starting page is pageblock-aligned */
- BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+ BUG_ON(pfn & (pageblock_nr_pages - 1));
/* If the entire pageblock is free, move to the end of free page */
if (pageblock_free(page)) {
@@ -1200,16 +1202,16 @@ static struct page *next_active_pageblock(struct page *page)
/* be careful. we don't have locks, page_order can be changed.*/
order = page_order(page);
if ((order < MAX_ORDER) && (order >= pageblock_order))
- return page + (1 << order);
+ return pfn + (1 << order);
}
- return page + pageblock_nr_pages;
+ return pfn + pageblock_nr_pages;
}
-static bool is_pageblock_removable_nolock(struct page *page)
+static bool is_pageblock_removable_nolock(unsigned long pfn)
{
+ struct page *page = pfn_to_page(pfn);
struct zone *zone;
- unsigned long pfn;
/*
* We have to be careful here because we are iterating over memory
@@ -1232,13 +1234,14 @@ static bool is_pageblock_removable_nolock(struct page *page)
/* Checks if this range of memory is likely to be hot-removable. */
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{
- struct page *page = pfn_to_page(start_pfn);
- unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page)));
- struct page *end_page = pfn_to_page(end_pfn);
+ unsigned long end_pfn, pfn;
+
+ end_pfn = min(start_pfn + nr_pages,
+ zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
/* Check the starting page of each pageblock within the range */
- for (; page < end_page; page = next_active_pageblock(page)) {
- if (!is_pageblock_removable_nolock(page))
+ for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
+ if (!is_pageblock_removable_nolock(pfn))
return false;
cond_resched();
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d4496d9d34f5..ee2bce59d2bf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1314,7 +1314,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
nodemask_t *nodes)
{
unsigned long copy = ALIGN(maxnode-1, 64) / 8;
- const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
+ unsigned int nbytes = BITS_TO_LONGS(nr_node_ids) * sizeof(long);
if (copy > nbytes) {
if (copy > PAGE_SIZE)
@@ -1491,7 +1491,7 @@ static int kernel_get_mempolicy(int __user *policy,
int uninitialized_var(pval);
nodemask_t nodes;
- if (nmask != NULL && maxnode < MAX_NUMNODES)
+ if (nmask != NULL && maxnode < nr_node_ids)
return -EINVAL;
err = do_get_mempolicy(&pval, &nodes, addr, flags);
@@ -1527,7 +1527,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
- nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids);
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 35fdde041f5c..0b9f577b1a2a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2170,6 +2170,18 @@ static inline void boost_watermark(struct zone *zone)
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
watermark_boost_factor, 10000);
+
+ /*
+ * high watermark may be uninitialised if fragmentation occurs
+ * very early in boot so do not boost. We do not fall
+ * through and boost by pageblock_nr_pages as failing
+ * allocations that early means that reclaim is not going
+ * to help and it may even be impossible to reclaim the
+ * boosted watermark resulting in a hang.
+ */
+ if (!max_boost)
+ return;
+
max_boost = max(pageblock_nr_pages, max_boost);
zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
@@ -4675,11 +4687,11 @@ refill:
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
- page_ref_add(page, size - 1);
+ page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
- nc->pagecnt_bias = size;
+ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = size;
}
@@ -4695,10 +4707,10 @@ refill:
size = nc->size;
#endif
/* OK, page count is 0, we can safely set it */
- set_page_count(page, size);
+ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
/* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
+ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = size - fragsz;
}
diff --git a/mm/page_ext.c b/mm/page_ext.c
index ae44f7adbe07..8c78b8d45117 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -398,10 +398,8 @@ void __init page_ext_init(void)
* We know some arch can have a nodes layout such as
* -------------pfn-------------->
* N0 | N1 | N2 | N0 | N1 | N2|....
- *
- * Take into account DEFERRED_STRUCT_PAGE_INIT.
*/
- if (early_pfn_to_nid(pfn) != nid)
+ if (pfn_to_nid(pfn) != nid)
continue;
if (init_section_page_ext(pfn, nid))
goto oom;
diff --git a/mm/shmem.c b/mm/shmem.c
index 6ece1e2fe76e..2c012eee133d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2848,16 +2848,20 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(old_dentry);
- int ret;
+ int ret = 0;
/*
* No ordinary (disk based) filesystem counts links as inodes;
* but each new link needs a new dentry, pinning lowmem, and
* tmpfs dentries cannot be pruned until they are unlinked.
+ * But if an O_TMPFILE file is linked into the tmpfs, the
+ * first link must skip that, to get the accounting right.
*/
- ret = shmem_reserve_inode(inode->i_sb);
- if (ret)
- goto out;
+ if (inode->i_nlink) {
+ ret = shmem_reserve_inode(inode->i_sb);
+ if (ret)
+ goto out;
+ }
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
diff --git a/mm/slab.c b/mm/slab.c
index 78eb8c5bf4e4..91c1863df93d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2359,7 +2359,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
void *freelist;
void *addr = page_address(page);
- page->s_mem = kasan_reset_tag(addr) + colour_off;
+ page->s_mem = addr + colour_off;
page->active = 0;
if (OBJFREELIST_SLAB(cachep))
@@ -2368,6 +2368,7 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
/* Slab management obj is off-slab. */
freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
+ freelist = kasan_reset_tag(freelist);
if (!freelist)
return NULL;
} else {
@@ -2681,6 +2682,13 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
offset *= cachep->colour_off;
+ /*
+ * Call kasan_poison_slab() before calling alloc_slabmgmt(), so
+ * page_address() in the latter returns a non-tagged pointer,
+ * as it should be for slab pages.
+ */
+ kasan_poison_slab(page);
+
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, page_node);
@@ -2689,7 +2697,6 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
slab_map_pages(cachep, page, freelist);
- kasan_poison_slab(page);
cache_init_objs(cachep, page);
if (gfpflags_allow_blocking(local_flags))
@@ -3540,7 +3547,6 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
- ret = kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
@@ -3630,7 +3636,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
- ret = kasan_slab_alloc(cachep, ret, flags);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
cachep->object_size, cachep->size,
flags, nodeid);
@@ -4408,6 +4413,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
unsigned int objnr;
unsigned long offset;
+ ptr = kasan_reset_tag(ptr);
+
/* Find and validate object. */
cachep = page->slab_cache;
objnr = obj_to_index(cachep, page, (void *)ptr);
diff --git a/mm/slab.h b/mm/slab.h
index 4190c24ef0e9..384105318779 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -437,11 +437,10 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
flags &= gfp_allowed_mask;
for (i = 0; i < size; i++) {
- void *object = p[i];
-
- kmemleak_alloc_recursive(object, s->object_size, 1,
+ p[i] = kasan_slab_alloc(s, p[i], flags);
+ /* As p[i] might get tagged, call kmemleak hook after KASAN. */
+ kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
- p[i] = kasan_slab_alloc(s, object, flags);
}
if (memcg_kmem_enabled())
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 81732d05e74a..f9d89c1b5977 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1228,8 +1228,9 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
ret = page ? page_address(page) : NULL;
- kmemleak_alloc(ret, size, 1, flags);
ret = kasan_kmalloc_large(ret, size, flags);
+ /* As ret might get tagged, call kmemleak hook after KASAN. */
+ kmemleak_alloc(ret, size, 1, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
diff --git a/mm/slub.c b/mm/slub.c
index 1e3d0ec4e200..dc777761b6b7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -249,7 +249,18 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
unsigned long ptr_addr)
{
#ifdef CONFIG_SLAB_FREELIST_HARDENED
- return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr);
+ /*
+ * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
+ * Normally, this doesn't cause any issues, as both set_freepointer()
+ * and get_freepointer() are called with a pointer with the same tag.
+ * However, there are some issues with CONFIG_SLUB_DEBUG code. For
+ * example, when __free_slub() iterates over objects in a cache, it
+ * passes untagged pointers to check_object(). check_object() in turns
+ * calls get_freepointer() with an untagged pointer, which causes the
+ * freepointer to be restored incorrectly.
+ */
+ return (void *)((unsigned long)ptr ^ s->random ^
+ (unsigned long)kasan_reset_tag((void *)ptr_addr));
#else
return ptr;
#endif
@@ -303,15 +314,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
__p < (__addr) + (__objects) * (__s)->size; \
__p += (__s)->size)
-#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
- for (__p = fixup_red_left(__s, __addr), __idx = 1; \
- __idx <= __objects; \
- __p += (__s)->size, __idx++)
-
/* Determine object index from a given position */
static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
{
- return (p - addr) / s->size;
+ return (kasan_reset_tag(p) - addr) / s->size;
}
static inline unsigned int order_objects(unsigned int order, unsigned int size)
@@ -507,6 +513,7 @@ static inline int check_valid_pointer(struct kmem_cache *s,
return 1;
base = page_address(page);
+ object = kasan_reset_tag(object);
object = restore_red_left(s, object);
if (object < base || object >= base + page->objects * s->size ||
(object - base) % s->size) {
@@ -1075,6 +1082,16 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
init_tracking(s, object);
}
+static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
+{
+ if (!(s->flags & SLAB_POISON))
+ return;
+
+ metadata_access_enable();
+ memset(addr, POISON_INUSE, PAGE_SIZE << order);
+ metadata_access_disable();
+}
+
static inline int alloc_consistency_checks(struct kmem_cache *s,
struct page *page,
void *object, unsigned long addr)
@@ -1330,6 +1347,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
+static inline void setup_page_debug(struct kmem_cache *s,
+ void *addr, int order) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
@@ -1374,8 +1393,10 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
*/
static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
+ ptr = kasan_kmalloc_large(ptr, size, flags);
+ /* As ptr might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ptr, size, 1, flags);
- return kasan_kmalloc_large(ptr, size, flags);
+ return ptr;
}
static __always_inline void kfree_hook(void *x)
@@ -1641,27 +1662,25 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if (page_is_pfmemalloc(page))
SetPageSlabPfmemalloc(page);
+ kasan_poison_slab(page);
+
start = page_address(page);
- if (unlikely(s->flags & SLAB_POISON))
- memset(start, POISON_INUSE, PAGE_SIZE << order);
-
- kasan_poison_slab(page);
+ setup_page_debug(s, start, order);
shuffle = shuffle_freelist(s, page);
if (!shuffle) {
- for_each_object_idx(p, idx, s, start, page->objects) {
- if (likely(idx < page->objects)) {
- next = p + s->size;
- next = setup_object(s, page, next);
- set_freepointer(s, p, next);
- } else
- set_freepointer(s, p, NULL);
- }
start = fixup_red_left(s, start);
start = setup_object(s, page, start);
page->freelist = start;
+ for (idx = 0, p = start; idx < page->objects - 1; idx++) {
+ next = p + s->size;
+ next = setup_object(s, page, next);
+ set_freepointer(s, p, next);
+ p = next;
+ }
+ set_freepointer(s, p, NULL);
}
page->inuse = page->objects;
diff --git a/mm/swap.c b/mm/swap.c
index 4929bc1be60e..4d7d37eb3c40 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -320,11 +320,6 @@ static inline void activate_page_drain(int cpu)
{
}
-static bool need_activate_page_drain(int cpu)
-{
- return false;
-}
-
void activate_page(struct page *page)
{
struct zone *zone = page_zone(page);
@@ -653,13 +648,15 @@ void lru_add_drain(void)
put_cpu();
}
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+
static void lru_add_drain_per_cpu(struct work_struct *dummy)
{
lru_add_drain();
}
-static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
-
/*
* Doesn't need any cpu hotplug locking because we do rely on per-cpu
* kworkers being shut down before our page_alloc_cpu_dead callback is
@@ -702,6 +699,12 @@ void lru_add_drain_all(void)
mutex_unlock(&lock);
}
+#else
+void lru_add_drain_all(void)
+{
+ lru_add_drain();
+}
+#endif
/**
* release_pages - batched put_page()
diff --git a/mm/util.c b/mm/util.c
index 1ea055138043..379319b1bcfd 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -150,7 +150,7 @@ void *memdup_user(const void __user *src, size_t len)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER);
+ p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a714c4f800e9..e979705bbf32 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -491,16 +491,6 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
delta = freeable / 2;
}
- /*
- * Make sure we apply some minimal pressure on default priority
- * even on small cgroups. Stale objects are not only consuming memory
- * by themselves, but can also hold a reference to a dying cgroup,
- * preventing it from being reclaimed. A dying cgroup with all
- * corresponding structures like per-cpu stats and kmem caches
- * can be really big, so it may lead to a significant waste of memory.
- */
- delta = max_t(unsigned long long, delta, min(freeable, batch_size));
-
total_scan += delta;
if (total_scan < 0) {
pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",