diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 169 |
1 files changed, 82 insertions, 87 deletions
diff --git a/mm/slub.c b/mm/slub.c index f5baf429654f..b2833ce85c92 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -235,6 +235,14 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) #endif } +/* + * Tracks for which NUMA nodes we have kmem_cache_nodes allocated. + * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily + * differ during memory hotplug/hotremove operations. + * Protected by slab_mutex. + */ +static nodemask_t slab_nodes; + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -1400,7 +1408,6 @@ __setup("slub_debug", setup_slub_debug); * @object_size: the size of an object without meta data * @flags: flags to set * @name: name of the cache - * @ctor: constructor function * * Debug option(s) are applied to @flags. In addition to the debug * option(s), if a slab name (or multiple) is specified i.e. @@ -1408,13 +1415,21 @@ __setup("slub_debug", setup_slub_debug); * then only the select slabs will receive the debug option(s). */ slab_flags_t kmem_cache_flags(unsigned int object_size, - slab_flags_t flags, const char *name, - void (*ctor)(void *)) + slab_flags_t flags, const char *name) { char *iter; size_t len; char *next_block; slab_flags_t block_flags; + slab_flags_t slub_debug_local = slub_debug; + + /* + * If the slab cache is for debugging (e.g. kmemleak) then + * don't store user (stack trace) information by default, + * but let the user enable it via the command line below. + */ + if (flags & SLAB_NOLEAKTRACE) + slub_debug_local &= ~SLAB_STORE_USER; len = strlen(name); next_block = slub_debug_string; @@ -1449,7 +1464,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, } } - return flags | slub_debug; + return flags | slub_debug_local; } #else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, @@ -1474,8 +1489,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) {} slab_flags_t kmem_cache_flags(unsigned int object_size, - slab_flags_t flags, const char *name, - void (*ctor)(void *)) + slab_flags_t flags, const char *name) { return flags; } @@ -1514,7 +1528,7 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) static __always_inline void kfree_hook(void *x) { kmemleak_free(x); - kasan_kfree_large(x, _RET_IP_); + kasan_kfree_large(x); } static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x) @@ -1544,7 +1558,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x) KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); /* KASAN might put x into memory quarantine, delaying its reuse */ - return kasan_slab_free(s, x, _RET_IP_); + return kasan_slab_free(s, x); } static inline bool slab_free_freelist_hook(struct kmem_cache *s, @@ -1771,7 +1785,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) page->objects = oo_objects(oo); - account_slab_page(page, oo_order(oo), s); + account_slab_page(page, oo_order(oo), s, flags); page->slab_cache = s; __SetPageSlab(page); @@ -2153,9 +2167,9 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, { enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - int lock = 0; + int lock = 0, free_delta = 0; enum slab_modes l = M_NONE, m = M_NONE; - void *nextfree; + void *nextfree, *freelist_iter, *freelist_tail; int tail = DEACTIVATE_TO_HEAD; struct page new; struct page old; @@ -2166,45 +2180,34 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, } /* - * Stage one: Free all available per cpu objects back - * to the page freelist while it is still frozen. Leave the - * last one. - * - * There is no need to take the list->lock because the page - * is still frozen. + * Stage one: Count the objects on cpu's freelist as free_delta and + * remember the last object in freelist_tail for later splicing. */ - while (freelist && (nextfree = get_freepointer(s, freelist))) { - void *prior; - unsigned long counters; + freelist_tail = NULL; + freelist_iter = freelist; + while (freelist_iter) { + nextfree = get_freepointer(s, freelist_iter); /* * If 'nextfree' is invalid, it is possible that the object at - * 'freelist' is already corrupted. So isolate all objects - * starting at 'freelist'. + * 'freelist_iter' is already corrupted. So isolate all objects + * starting at 'freelist_iter' by skipping them. */ - if (freelist_corrupted(s, page, &freelist, nextfree)) + if (freelist_corrupted(s, page, &freelist_iter, nextfree)) break; - do { - prior = page->freelist; - counters = page->counters; - set_freepointer(s, freelist, prior); - new.counters = counters; - new.inuse--; - VM_BUG_ON(!new.frozen); - - } while (!__cmpxchg_double_slab(s, page, - prior, counters, - freelist, new.counters, - "drain percpu freelist")); + freelist_tail = freelist_iter; + free_delta++; - freelist = nextfree; + freelist_iter = nextfree; } /* - * Stage two: Ensure that the page is unfrozen while the - * list presence reflects the actual number of objects - * during unfreeze. + * Stage two: Unfreeze the page while splicing the per-cpu + * freelist to the head of page's freelist. + * + * Ensure that the page is unfrozen while the list presence + * reflects the actual number of objects during unfreeze. * * We setup the list membership and then perform a cmpxchg * with the count. If there is a mismatch then the page @@ -2217,15 +2220,15 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, */ redo: - old.freelist = page->freelist; - old.counters = page->counters; + old.freelist = READ_ONCE(page->freelist); + old.counters = READ_ONCE(page->counters); VM_BUG_ON(!old.frozen); /* Determine target state of the slab */ new.counters = old.counters; - if (freelist) { - new.inuse--; - set_freepointer(s, freelist, old.freelist); + if (freelist_tail) { + new.inuse -= free_delta; + set_freepointer(s, freelist_tail, old.freelist); new.freelist = freelist; } else new.freelist = old.freelist; @@ -2672,7 +2675,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * ignore the node constraint */ if (unlikely(node != NUMA_NO_NODE && - !node_state(node, N_NORMAL_MEMORY))) + !node_isset(node, slab_nodes))) node = NUMA_NO_NODE; goto new_slab; } @@ -2683,7 +2686,7 @@ redo: * same as above but node_match() being false already * implies node != NUMA_NO_NODE */ - if (!node_state(node, N_NORMAL_MEMORY)) { + if (!node_isset(node, slab_nodes)) { node = NUMA_NO_NODE; goto redo; } else { @@ -3157,7 +3160,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) if (!s) return; slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); - trace_kmem_cache_free(_RET_IP_, x); + trace_kmem_cache_free(_RET_IP_, x, s->name); } EXPORT_SYMBOL(kmem_cache_free); @@ -3266,7 +3269,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) if (!df.page) continue; - slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); + slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); @@ -3586,7 +3589,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) { int node; - for_each_node_state(node, N_NORMAL_MEMORY) { + for_each_node_mask(node, slab_nodes) { struct kmem_cache_node *n; if (slab_state == DOWN) { @@ -3797,7 +3800,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) { - s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); + s->flags = kmem_cache_flags(s->size, flags, s->name); #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); #endif @@ -4039,8 +4042,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) page = alloc_pages_node(node, flags, order); if (page) { ptr = page_address(page); - mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, - PAGE_SIZE << order); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + PAGE_SIZE << order); } return kmalloc_large_node_hook(ptr, size, flags); @@ -4171,8 +4174,8 @@ void kfree(const void *x) BUG_ON(!PageCompound(page)); kfree_hook(object); - mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + -(PAGE_SIZE << order)); __free_pages(page, order); return; } @@ -4267,8 +4270,6 @@ static int slab_mem_going_offline_callback(void *arg) static void slab_mem_offline_callback(void *arg) { - struct kmem_cache_node *n; - struct kmem_cache *s; struct memory_notify *marg = arg; int offline_node; @@ -4282,21 +4283,12 @@ static void slab_mem_offline_callback(void *arg) return; mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_caches, list) { - n = get_node(s, offline_node); - if (n) { - /* - * if n->nr_slabs > 0, slabs still exist on the node - * that is going down. We were unable to free them, - * and offline_pages() function shouldn't call this - * callback. So, we must fail. - */ - BUG_ON(slabs_node(s, offline_node)); - - s->node[offline_node] = NULL; - kmem_cache_free(kmem_cache_node, n); - } - } + node_clear(offline_node, slab_nodes); + /* + * We no longer free kmem_cache_node structures here, as it would be + * racy with all get_node() users, and infeasible to protect them with + * slab_mutex. + */ mutex_unlock(&slab_mutex); } @@ -4323,6 +4315,12 @@ static int slab_mem_going_online_callback(void *arg) mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { /* + * The structure may already exist if the node was previously + * onlined and offlined. + */ + if (get_node(s, nid)) + continue; + /* * XXX: kmem_cache_alloc_node will fallback to other nodes * since memory is not yet available from the node that * is brought up. @@ -4335,6 +4333,11 @@ static int slab_mem_going_online_callback(void *arg) init_kmem_cache_node(n); s->node[nid] = n; } + /* + * Any cache created after this point will also have kmem_cache_node + * initialized for the new node. + */ + node_set(nid, slab_nodes); out: mutex_unlock(&slab_mutex); return ret; @@ -4415,6 +4418,7 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; + int node; if (debug_guardpage_minorder()) slub_max_order = 0; @@ -4422,6 +4426,13 @@ void __init kmem_cache_init(void) kmem_cache_node = &boot_kmem_cache_node; kmem_cache = &boot_kmem_cache; + /* + * Initialize the nodemask for which we will allocate per node + * structures. Here we don't need taking slab_mutex yet. + */ + for_each_node_state(node, N_NORMAL_MEMORY) + node_set(node, slab_nodes); + create_boot_cache(kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); @@ -4932,22 +4943,6 @@ enum slab_stat_type { #define SO_OBJECTS (1 << SL_OBJECTS) #define SO_TOTAL (1 << SL_TOTAL) -#ifdef CONFIG_MEMCG -static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); - -static int __init setup_slub_memcg_sysfs(char *str) -{ - int v; - - if (get_option(&str, &v) > 0) - memcg_sysfs_enabled = v; - - return 1; -} - -__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs); -#endif - static ssize_t show_slab_objects(struct kmem_cache *s, char *buf, unsigned long flags) { |