summaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c169
1 files changed, 82 insertions, 87 deletions
diff --git a/mm/slub.c b/mm/slub.c
index f5baf429654f..b2833ce85c92 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -235,6 +235,14 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
#endif
}
+/*
+ * Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
+ * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
+ * differ during memory hotplug/hotremove operations.
+ * Protected by slab_mutex.
+ */
+static nodemask_t slab_nodes;
+
/********************************************************************
* Core slab cache functions
*******************************************************************/
@@ -1400,7 +1408,6 @@ __setup("slub_debug", setup_slub_debug);
* @object_size: the size of an object without meta data
* @flags: flags to set
* @name: name of the cache
- * @ctor: constructor function
*
* Debug option(s) are applied to @flags. In addition to the debug
* option(s), if a slab name (or multiple) is specified i.e.
@@ -1408,13 +1415,21 @@ __setup("slub_debug", setup_slub_debug);
* then only the select slabs will receive the debug option(s).
*/
slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name,
- void (*ctor)(void *))
+ slab_flags_t flags, const char *name)
{
char *iter;
size_t len;
char *next_block;
slab_flags_t block_flags;
+ slab_flags_t slub_debug_local = slub_debug;
+
+ /*
+ * If the slab cache is for debugging (e.g. kmemleak) then
+ * don't store user (stack trace) information by default,
+ * but let the user enable it via the command line below.
+ */
+ if (flags & SLAB_NOLEAKTRACE)
+ slub_debug_local &= ~SLAB_STORE_USER;
len = strlen(name);
next_block = slub_debug_string;
@@ -1449,7 +1464,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
}
}
- return flags | slub_debug;
+ return flags | slub_debug_local;
}
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
@@ -1474,8 +1489,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct page *page) {}
slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name,
- void (*ctor)(void *))
+ slab_flags_t flags, const char *name)
{
return flags;
}
@@ -1514,7 +1528,7 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
static __always_inline void kfree_hook(void *x)
{
kmemleak_free(x);
- kasan_kfree_large(x, _RET_IP_);
+ kasan_kfree_large(x);
}
static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
@@ -1544,7 +1558,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
/* KASAN might put x into memory quarantine, delaying its reuse */
- return kasan_slab_free(s, x, _RET_IP_);
+ return kasan_slab_free(s, x);
}
static inline bool slab_free_freelist_hook(struct kmem_cache *s,
@@ -1771,7 +1785,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
page->objects = oo_objects(oo);
- account_slab_page(page, oo_order(oo), s);
+ account_slab_page(page, oo_order(oo), s, flags);
page->slab_cache = s;
__SetPageSlab(page);
@@ -2153,9 +2167,9 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
- int lock = 0;
+ int lock = 0, free_delta = 0;
enum slab_modes l = M_NONE, m = M_NONE;
- void *nextfree;
+ void *nextfree, *freelist_iter, *freelist_tail;
int tail = DEACTIVATE_TO_HEAD;
struct page new;
struct page old;
@@ -2166,45 +2180,34 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
/*
- * Stage one: Free all available per cpu objects back
- * to the page freelist while it is still frozen. Leave the
- * last one.
- *
- * There is no need to take the list->lock because the page
- * is still frozen.
+ * Stage one: Count the objects on cpu's freelist as free_delta and
+ * remember the last object in freelist_tail for later splicing.
*/
- while (freelist && (nextfree = get_freepointer(s, freelist))) {
- void *prior;
- unsigned long counters;
+ freelist_tail = NULL;
+ freelist_iter = freelist;
+ while (freelist_iter) {
+ nextfree = get_freepointer(s, freelist_iter);
/*
* If 'nextfree' is invalid, it is possible that the object at
- * 'freelist' is already corrupted. So isolate all objects
- * starting at 'freelist'.
+ * 'freelist_iter' is already corrupted. So isolate all objects
+ * starting at 'freelist_iter' by skipping them.
*/
- if (freelist_corrupted(s, page, &freelist, nextfree))
+ if (freelist_corrupted(s, page, &freelist_iter, nextfree))
break;
- do {
- prior = page->freelist;
- counters = page->counters;
- set_freepointer(s, freelist, prior);
- new.counters = counters;
- new.inuse--;
- VM_BUG_ON(!new.frozen);
-
- } while (!__cmpxchg_double_slab(s, page,
- prior, counters,
- freelist, new.counters,
- "drain percpu freelist"));
+ freelist_tail = freelist_iter;
+ free_delta++;
- freelist = nextfree;
+ freelist_iter = nextfree;
}
/*
- * Stage two: Ensure that the page is unfrozen while the
- * list presence reflects the actual number of objects
- * during unfreeze.
+ * Stage two: Unfreeze the page while splicing the per-cpu
+ * freelist to the head of page's freelist.
+ *
+ * Ensure that the page is unfrozen while the list presence
+ * reflects the actual number of objects during unfreeze.
*
* We setup the list membership and then perform a cmpxchg
* with the count. If there is a mismatch then the page
@@ -2217,15 +2220,15 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
*/
redo:
- old.freelist = page->freelist;
- old.counters = page->counters;
+ old.freelist = READ_ONCE(page->freelist);
+ old.counters = READ_ONCE(page->counters);
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
new.counters = old.counters;
- if (freelist) {
- new.inuse--;
- set_freepointer(s, freelist, old.freelist);
+ if (freelist_tail) {
+ new.inuse -= free_delta;
+ set_freepointer(s, freelist_tail, old.freelist);
new.freelist = freelist;
} else
new.freelist = old.freelist;
@@ -2672,7 +2675,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* ignore the node constraint
*/
if (unlikely(node != NUMA_NO_NODE &&
- !node_state(node, N_NORMAL_MEMORY)))
+ !node_isset(node, slab_nodes)))
node = NUMA_NO_NODE;
goto new_slab;
}
@@ -2683,7 +2686,7 @@ redo:
* same as above but node_match() being false already
* implies node != NUMA_NO_NODE
*/
- if (!node_state(node, N_NORMAL_MEMORY)) {
+ if (!node_isset(node, slab_nodes)) {
node = NUMA_NO_NODE;
goto redo;
} else {
@@ -3157,7 +3160,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
if (!s)
return;
slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
- trace_kmem_cache_free(_RET_IP_, x);
+ trace_kmem_cache_free(_RET_IP_, x, s->name);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3266,7 +3269,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
if (!df.page)
continue;
- slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
+ slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
@@ -3586,7 +3589,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
{
int node;
- for_each_node_state(node, N_NORMAL_MEMORY) {
+ for_each_node_mask(node, slab_nodes) {
struct kmem_cache_node *n;
if (slab_state == DOWN) {
@@ -3797,7 +3800,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
- s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
+ s->flags = kmem_cache_flags(s->size, flags, s->name);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
s->random = get_random_long();
#endif
@@ -4039,8 +4042,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
page = alloc_pages_node(node, flags, order);
if (page) {
ptr = page_address(page);
- mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
- PAGE_SIZE << order);
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ PAGE_SIZE << order);
}
return kmalloc_large_node_hook(ptr, size, flags);
@@ -4171,8 +4174,8 @@ void kfree(const void *x)
BUG_ON(!PageCompound(page));
kfree_hook(object);
- mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
- -(PAGE_SIZE << order));
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
__free_pages(page, order);
return;
}
@@ -4267,8 +4270,6 @@ static int slab_mem_going_offline_callback(void *arg)
static void slab_mem_offline_callback(void *arg)
{
- struct kmem_cache_node *n;
- struct kmem_cache *s;
struct memory_notify *marg = arg;
int offline_node;
@@ -4282,21 +4283,12 @@ static void slab_mem_offline_callback(void *arg)
return;
mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
- n = get_node(s, offline_node);
- if (n) {
- /*
- * if n->nr_slabs > 0, slabs still exist on the node
- * that is going down. We were unable to free them,
- * and offline_pages() function shouldn't call this
- * callback. So, we must fail.
- */
- BUG_ON(slabs_node(s, offline_node));
-
- s->node[offline_node] = NULL;
- kmem_cache_free(kmem_cache_node, n);
- }
- }
+ node_clear(offline_node, slab_nodes);
+ /*
+ * We no longer free kmem_cache_node structures here, as it would be
+ * racy with all get_node() users, and infeasible to protect them with
+ * slab_mutex.
+ */
mutex_unlock(&slab_mutex);
}
@@ -4323,6 +4315,12 @@ static int slab_mem_going_online_callback(void *arg)
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) {
/*
+ * The structure may already exist if the node was previously
+ * onlined and offlined.
+ */
+ if (get_node(s, nid))
+ continue;
+ /*
* XXX: kmem_cache_alloc_node will fallback to other nodes
* since memory is not yet available from the node that
* is brought up.
@@ -4335,6 +4333,11 @@ static int slab_mem_going_online_callback(void *arg)
init_kmem_cache_node(n);
s->node[nid] = n;
}
+ /*
+ * Any cache created after this point will also have kmem_cache_node
+ * initialized for the new node.
+ */
+ node_set(nid, slab_nodes);
out:
mutex_unlock(&slab_mutex);
return ret;
@@ -4415,6 +4418,7 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
+ int node;
if (debug_guardpage_minorder())
slub_max_order = 0;
@@ -4422,6 +4426,13 @@ void __init kmem_cache_init(void)
kmem_cache_node = &boot_kmem_cache_node;
kmem_cache = &boot_kmem_cache;
+ /*
+ * Initialize the nodemask for which we will allocate per node
+ * structures. Here we don't need taking slab_mutex yet.
+ */
+ for_each_node_state(node, N_NORMAL_MEMORY)
+ node_set(node, slab_nodes);
+
create_boot_cache(kmem_cache_node, "kmem_cache_node",
sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
@@ -4932,22 +4943,6 @@ enum slab_stat_type {
#define SO_OBJECTS (1 << SL_OBJECTS)
#define SO_TOTAL (1 << SL_TOTAL)
-#ifdef CONFIG_MEMCG
-static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
-
-static int __init setup_slub_memcg_sysfs(char *str)
-{
- int v;
-
- if (get_option(&str, &v) > 0)
- memcg_sysfs_enabled = v;
-
- return 1;
-}
-
-__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
-#endif
-
static ssize_t show_slab_objects(struct kmem_cache *s,
char *buf, unsigned long flags)
{