From c31a910c74ed558461dc7eecf6168ccf805775ec Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:10 +0900 Subject: mm/slab: move NUMA-related code to __do_cache_alloc() To implement slab_alloc_node() independent of NUMA configuration, move NUMA fallback/alternate allocation code into __do_cache_alloc(). One functional change here is not to check availability of node when allocating from local node. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slab.c | 68 +++++++++++++++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 10e96137b44f..1656393f55cb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3180,13 +3180,14 @@ must_grow: return obj ? obj : fallback_alloc(cachep, flags); } +static void *__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid); + static __always_inline void * slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size, unsigned long caller) { unsigned long save_flags; void *ptr; - int slab_node = numa_mem_id(); struct obj_cgroup *objcg = NULL; bool init = false; @@ -3200,30 +3201,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_ goto out_hooks; local_irq_save(save_flags); - - if (nodeid == NUMA_NO_NODE) - nodeid = slab_node; - - if (unlikely(!get_node(cachep, nodeid))) { - /* Node not bootstrapped yet */ - ptr = fallback_alloc(cachep, flags); - goto out; - } - - if (nodeid == slab_node) { - /* - * Use the locally cached objects if possible. - * However ____cache_alloc does not allow fallback - * to other nodes. It may fail while we still have - * objects on other nodes available. - */ - ptr = ____cache_alloc(cachep, flags); - if (ptr) - goto out; - } - /* ___cache_alloc_node can fall back to other nodes */ - ptr = ____cache_alloc_node(cachep, flags, nodeid); -out: + ptr = __do_cache_alloc(cachep, flags, nodeid); local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); init = slab_want_init_on_alloc(flags, cachep); @@ -3234,31 +3212,46 @@ out_hooks: } static __always_inline void * -__do_cache_alloc(struct kmem_cache *cache, gfp_t flags) +__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - void *objp; + void *objp = NULL; + int slab_node = numa_mem_id(); - if (current->mempolicy || cpuset_do_slab_mem_spread()) { - objp = alternate_node_alloc(cache, flags); - if (objp) - goto out; + if (nodeid == NUMA_NO_NODE) { + if (current->mempolicy || cpuset_do_slab_mem_spread()) { + objp = alternate_node_alloc(cachep, flags); + if (objp) + goto out; + } + /* + * Use the locally cached objects if possible. + * However ____cache_alloc does not allow fallback + * to other nodes. It may fail while we still have + * objects on other nodes available. + */ + objp = ____cache_alloc(cachep, flags); + nodeid = slab_node; + } else if (nodeid == slab_node) { + objp = ____cache_alloc(cachep, flags); + } else if (!get_node(cachep, nodeid)) { + /* Node not bootstrapped yet */ + objp = fallback_alloc(cachep, flags); + goto out; } - objp = ____cache_alloc(cache, flags); /* * We may just have run out of memory on the local node. * ____cache_alloc_node() knows how to locate memory on other nodes */ if (!objp) - objp = ____cache_alloc_node(cache, flags, numa_mem_id()); - + objp = ____cache_alloc_node(cachep, flags, nodeid); out: return objp; } #else static __always_inline void * -__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid __maybe_unused) { return ____cache_alloc(cachep, flags); } @@ -3284,7 +3277,7 @@ slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, goto out; local_irq_save(save_flags); - objp = __do_cache_alloc(cachep, flags); + objp = __do_cache_alloc(cachep, flags, NUMA_NO_NODE); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); @@ -3521,7 +3514,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_irq_disable(); for (i = 0; i < size; i++) { - void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags); + void *objp = kfence_alloc(s, s->object_size, flags) ?: + __do_cache_alloc(s, flags, NUMA_NO_NODE); if (unlikely(!objp)) goto error; -- cgit v1.2.3 From 07588d726f8d320215dcf6c79a28fe6b1bab6255 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:11 +0900 Subject: mm/slab: cleanup slab_alloc() and slab_alloc_node() Make slab_alloc_node() available even when CONFIG_NUMA=n and make slab_alloc() wrapper of slab_alloc_node(). This is necessary for further cleanup. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slab.c | 49 +++++++++++++------------------------------------ 1 file changed, 13 insertions(+), 36 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 1656393f55cb..748dd085f38e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3180,37 +3180,6 @@ must_grow: return obj ? obj : fallback_alloc(cachep, flags); } -static void *__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid); - -static __always_inline void * -slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size, - unsigned long caller) -{ - unsigned long save_flags; - void *ptr; - struct obj_cgroup *objcg = NULL; - bool init = false; - - flags &= gfp_allowed_mask; - cachep = slab_pre_alloc_hook(cachep, NULL, &objcg, 1, flags); - if (unlikely(!cachep)) - return NULL; - - ptr = kfence_alloc(cachep, orig_size, flags); - if (unlikely(ptr)) - goto out_hooks; - - local_irq_save(save_flags); - ptr = __do_cache_alloc(cachep, flags, nodeid); - local_irq_restore(save_flags); - ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); - init = slab_want_init_on_alloc(flags, cachep); - -out_hooks: - slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr, init); - return ptr; -} - static __always_inline void * __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid) { @@ -3259,8 +3228,8 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int nodeid __maybe_unus #endif /* CONFIG_NUMA */ static __always_inline void * -slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, - size_t orig_size, unsigned long caller) +slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, + int nodeid, size_t orig_size, unsigned long caller) { unsigned long save_flags; void *objp; @@ -3277,7 +3246,7 @@ slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, goto out; local_irq_save(save_flags); - objp = __do_cache_alloc(cachep, flags, NUMA_NO_NODE); + objp = __do_cache_alloc(cachep, flags, nodeid); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); @@ -3288,6 +3257,14 @@ out: return objp; } +static __always_inline void * +slab_alloc(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags, + size_t orig_size, unsigned long caller) +{ + return slab_alloc_node(cachep, lru, flags, NUMA_NO_NODE, orig_size, + caller); +} + /* * Caller needs to acquire correct kmem_cache_node's list_lock * @list: List of detached free slabs should be freed by caller @@ -3574,7 +3551,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace); */ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_); + void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_); trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep, cachep->object_size, cachep->size, @@ -3592,7 +3569,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, { void *ret; - ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_); + ret = slab_alloc_node(cachep, NULL, flags, nodeid, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc_node(_RET_IP_, ret, cachep, -- cgit v1.2.3 From f78a03f6e28be0283f73d3c18b54837b638a8ccf Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:12 +0900 Subject: mm/slab_common: remove CONFIG_NUMA ifdefs for common kmalloc functions Now that slab_alloc_node() is available for SLAB when CONFIG_NUMA=n, remove CONFIG_NUMA ifdefs for common kmalloc functions. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 28 ---------------------------- mm/slab.c | 2 -- mm/slob.c | 5 +---- mm/slub.c | 6 ------ 4 files changed, 1 insertion(+), 40 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 0fefdf528e0d..4754c834b0e3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -456,38 +456,18 @@ static __always_inline void kfree_bulk(size_t size, void **p) kmem_cache_free_bulk(NULL, size, p); } -#ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; -#else -static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __kmalloc(size, flags); -} - -static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) -{ - return kmem_cache_alloc(s, flags); -} -#endif #ifdef CONFIG_TRACING extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) __assume_slab_alignment __alloc_size(3); -#ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) __assume_slab_alignment __alloc_size(4); -#else -static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, int node, size_t size) -{ - return kmem_cache_alloc_trace(s, gfpflags, size); -} -#endif /* CONFIG_NUMA */ #else /* CONFIG_TRACING */ static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s, @@ -701,20 +681,12 @@ static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t } -#ifdef CONFIG_NUMA extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, unsigned long caller) __alloc_size(1); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ _RET_IP_) -#else /* CONFIG_NUMA */ - -#define kmalloc_node_track_caller(size, flags, node) \ - kmalloc_track_caller(size, flags) - -#endif /* CONFIG_NUMA */ - /* * Shortcuts */ diff --git a/mm/slab.c b/mm/slab.c index 748dd085f38e..0acd65358c83 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3535,7 +3535,6 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size) EXPORT_SYMBOL(kmem_cache_alloc_trace); #endif -#ifdef CONFIG_NUMA /** * kmem_cache_alloc_node - Allocate an object on the specified node * @cachep: The cache to allocate from. @@ -3609,7 +3608,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags, return __do_kmalloc_node(size, flags, node, caller); } EXPORT_SYMBOL(__kmalloc_node_track_caller); -#endif /* CONFIG_NUMA */ #ifdef CONFIG_PRINTK void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) diff --git a/mm/slob.c b/mm/slob.c index 2bd4f476c340..74d850967213 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -536,14 +536,12 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller) } EXPORT_SYMBOL(__kmalloc_track_caller); -#ifdef CONFIG_NUMA void *__kmalloc_node_track_caller(size_t size, gfp_t gfp, int node, unsigned long caller) { return __do_kmalloc_node(size, gfp, node, caller); } EXPORT_SYMBOL(__kmalloc_node_track_caller); -#endif void kfree(const void *block) { @@ -647,7 +645,7 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, gfp_ return slob_alloc_node(cachep, flags, NUMA_NO_NODE); } EXPORT_SYMBOL(kmem_cache_alloc_lru); -#ifdef CONFIG_NUMA + void *__kmalloc_node(size_t size, gfp_t gfp, int node) { return __do_kmalloc_node(size, gfp, node, _RET_IP_); @@ -659,7 +657,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node) return slob_alloc_node(cachep, gfp, node); } EXPORT_SYMBOL(kmem_cache_alloc_node); -#endif static void __kmem_cache_free(void *b, int size) { diff --git a/mm/slub.c b/mm/slub.c index 862dbd9af4f5..b29b3c9d3175 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3287,7 +3287,6 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) EXPORT_SYMBOL(kmem_cache_alloc_trace); #endif -#ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); @@ -3314,7 +3313,6 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s, } EXPORT_SYMBOL(kmem_cache_alloc_node_trace); #endif -#endif /* CONFIG_NUMA */ /* * Slow path handling. This may still be called frequently since objects @@ -4427,7 +4425,6 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); -#ifdef CONFIG_NUMA static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { struct page *page; @@ -4474,7 +4471,6 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return ret; } EXPORT_SYMBOL(__kmalloc_node); -#endif /* CONFIG_NUMA */ #ifdef CONFIG_HARDENED_USERCOPY /* @@ -4930,7 +4926,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) } EXPORT_SYMBOL(__kmalloc_track_caller); -#ifdef CONFIG_NUMA void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, int node, unsigned long caller) { @@ -4960,7 +4955,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, return ret; } EXPORT_SYMBOL(__kmalloc_node_track_caller); -#endif #ifdef CONFIG_SYSFS static int count_inuse(struct slab *slab) -- cgit v1.2.3 From c45248db04f8e3aca4798d67a394fb9cc2168118 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:13 +0900 Subject: mm/slab_common: cleanup kmalloc_track_caller() Make kmalloc_track_caller() wrapper of kmalloc_node_track_caller(). Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 17 ++++++++--------- mm/slab.c | 6 ------ mm/slob.c | 6 ------ mm/slub.c | 22 ---------------------- 4 files changed, 8 insertions(+), 43 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 4754c834b0e3..a0e57df3d5a4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -651,6 +651,12 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag return kmalloc_array(n, size, flags | __GFP_ZERO); } +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node_track_caller(size, flags, node, \ + _RET_IP_) + /* * kmalloc_track_caller is a special version of kmalloc that records the * calling function of the routine calling it for slab leak tracking instead @@ -659,9 +665,9 @@ static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flag * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, _RET_IP_) + __kmalloc_node_track_caller(size, flags, \ + NUMA_NO_NODE, _RET_IP_) static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, int node) @@ -680,13 +686,6 @@ static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); } - -extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, - unsigned long caller) __alloc_size(1); -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node_track_caller(size, flags, node, \ - _RET_IP_) - /* * Shortcuts */ diff --git a/mm/slab.c b/mm/slab.c index 0acd65358c83..611e630ff860 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3665,12 +3665,6 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); -void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) -{ - return __do_kmalloc(size, flags, caller); -} -EXPORT_SYMBOL(__kmalloc_track_caller); - /** * kmem_cache_free - Deallocate an object * @cachep: The cache the allocation was from. diff --git a/mm/slob.c b/mm/slob.c index 74d850967213..96b08acd72ce 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -530,12 +530,6 @@ void *__kmalloc(size_t size, gfp_t gfp) } EXPORT_SYMBOL(__kmalloc); -void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller) -{ - return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller); -} -EXPORT_SYMBOL(__kmalloc_track_caller); - void *__kmalloc_node_track_caller(size_t size, gfp_t gfp, int node, unsigned long caller) { diff --git a/mm/slub.c b/mm/slub.c index b29b3c9d3175..c82a4062f730 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4904,28 +4904,6 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) return 0; } -void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) -{ - struct kmem_cache *s; - void *ret; - - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) - return kmalloc_large(size, gfpflags); - - s = kmalloc_slab(size, gfpflags); - - if (unlikely(ZERO_OR_NULL_PTR(s))) - return s; - - ret = slab_alloc(s, NULL, gfpflags, caller, size); - - /* Honor the call site pointer we received. */ - trace_kmalloc(caller, ret, s, size, s->size, gfpflags); - - return ret; -} -EXPORT_SYMBOL(__kmalloc_track_caller); - void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, int node, unsigned long caller) { -- cgit v1.2.3 From 0f853b2e6dd9580103484a098e9c973a67d127ac Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:14 +0900 Subject: mm/sl[au]b: factor out __do_kmalloc_node() __kmalloc(), __kmalloc_node(), __kmalloc_node_track_caller() mostly do same job. Factor out common code into __do_kmalloc_node(). Note that this patch also fixes missing kasan_kmalloc() in SLUB's __kmalloc_node_track_caller(). Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slab.c | 30 +-------------------------- mm/slub.c | 71 +++++++++++++++++---------------------------------------------- 2 files changed, 20 insertions(+), 81 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 611e630ff860..8c08d7f3dead 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3631,37 +3631,9 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) } #endif -/** - * __do_kmalloc - allocate memory - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). - * @caller: function caller for debug tracking of the caller - * - * Return: pointer to the allocated memory or %NULL in case of error - */ -static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, - unsigned long caller) -{ - struct kmem_cache *cachep; - void *ret; - - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) - return NULL; - cachep = kmalloc_slab(size, flags); - if (unlikely(ZERO_OR_NULL_PTR(cachep))) - return cachep; - ret = slab_alloc(cachep, NULL, flags, size, caller); - - ret = kasan_kmalloc(cachep, ret, size, flags); - trace_kmalloc(caller, ret, cachep, - size, cachep->size, flags); - - return ret; -} - void *__kmalloc(size_t size, gfp_t flags) { - return __do_kmalloc(size, flags, _RET_IP_); + return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); } EXPORT_SYMBOL(__kmalloc); diff --git a/mm/slub.c b/mm/slub.c index c82a4062f730..f9929ba858ec 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4402,29 +4402,6 @@ static int __init setup_slub_min_objects(char *str) __setup("slub_min_objects=", setup_slub_min_objects); -void *__kmalloc(size_t size, gfp_t flags) -{ - struct kmem_cache *s; - void *ret; - - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) - return kmalloc_large(size, flags); - - s = kmalloc_slab(size, flags); - - if (unlikely(ZERO_OR_NULL_PTR(s))) - return s; - - ret = slab_alloc(s, NULL, flags, _RET_IP_, size); - - trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags); - - ret = kasan_kmalloc(s, ret, size, flags); - - return ret; -} -EXPORT_SYMBOL(__kmalloc); - static void *kmalloc_large_node(size_t size, gfp_t flags, int node) { struct page *page; @@ -4442,7 +4419,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) return kmalloc_large_node_hook(ptr, size, flags); } -void *__kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline +void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) { struct kmem_cache *s; void *ret; @@ -4450,7 +4428,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = kmalloc_large_node(size, flags, node); - trace_kmalloc_node(_RET_IP_, ret, NULL, + trace_kmalloc_node(caller, ret, NULL, size, PAGE_SIZE << get_order(size), flags, node); @@ -4462,16 +4440,28 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size); + ret = slab_alloc_node(s, NULL, flags, node, caller, size); - trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node); + trace_kmalloc_node(caller, ret, s, size, s->size, flags, node); ret = kasan_kmalloc(s, ret, size, flags); return ret; } + +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, _RET_IP_); +} EXPORT_SYMBOL(__kmalloc_node); +void *__kmalloc(size_t size, gfp_t flags) +{ + return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); +} +EXPORT_SYMBOL(__kmalloc); + + #ifdef CONFIG_HARDENED_USERCOPY /* * Rejects incorrectly sized objects and objects that are to be copied @@ -4905,32 +4895,9 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) } void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, - int node, unsigned long caller) + int node, unsigned long caller) { - struct kmem_cache *s; - void *ret; - - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - ret = kmalloc_large_node(size, gfpflags, node); - - trace_kmalloc_node(caller, ret, NULL, - size, PAGE_SIZE << get_order(size), - gfpflags, node); - - return ret; - } - - s = kmalloc_slab(size, gfpflags); - - if (unlikely(ZERO_OR_NULL_PTR(s))) - return s; - - ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size); - - /* Honor the call site pointer we received. */ - trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node); - - return ret; + return __do_kmalloc_node(size, gfpflags, node, caller); } EXPORT_SYMBOL(__kmalloc_node_track_caller); -- cgit v1.2.3 From e4c98d68959e51646c379e157bad36ef0d7bf467 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:15 +0900 Subject: mm/slab_common: fold kmalloc_order_trace() into kmalloc_large() There is no caller of kmalloc_order_trace() except kmalloc_large(). Fold it into kmalloc_large() and remove kmalloc_order{,_trace}(). Also add tracepoint in kmalloc_large() that was previously in kmalloc_order_trace(). Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 22 ++-------------------- mm/slab_common.c | 17 ++++------------- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index a0e57df3d5a4..15a4c59da59e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -489,26 +489,8 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment - __alloc_size(1); - -#ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) - __assume_page_alignment __alloc_size(1); -#else -static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags, - unsigned int order) -{ - return kmalloc_order(size, flags, order); -} -#endif - -static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags) -{ - unsigned int order = get_order(size); - return kmalloc_order_trace(size, flags, order); -} - +void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment + __alloc_size(1); /** * kmalloc - allocate memory * @size: how many bytes of memory are required. diff --git a/mm/slab_common.c b/mm/slab_common.c index 17996649cfe3..8b1988544b89 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -905,16 +905,16 @@ gfp_t kmalloc_fix_flags(gfp_t flags) * directly to the page allocator. We use __GFP_COMP, because we will need to * know the allocation order to free the pages properly in kfree. */ -void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) +void *kmalloc_large(size_t size, gfp_t flags) { void *ret = NULL; struct page *page; + unsigned int order = get_order(size); if (unlikely(flags & GFP_SLAB_BUG_MASK)) flags = kmalloc_fix_flags(flags); - flags |= __GFP_COMP; - page = alloc_pages(flags, order); + page = alloc_pages(flags | __GFP_COMP, order); if (likely(page)) { ret = page_address(page); mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, @@ -923,19 +923,10 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) ret = kasan_kmalloc_large(ret, size, flags); /* As ret might get tagged, call kmemleak hook after KASAN. */ kmemleak_alloc(ret, size, 1, flags); - return ret; -} -EXPORT_SYMBOL(kmalloc_order); - -#ifdef CONFIG_TRACING -void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) -{ - void *ret = kmalloc_order(size, flags, order); trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags); return ret; } -EXPORT_SYMBOL(kmalloc_order_trace); -#endif +EXPORT_SYMBOL(kmalloc_large); #ifdef CONFIG_SLAB_FREELIST_RANDOM /* Randomize a generic freelist */ -- cgit v1.2.3 From a0c3b940023eef3fa005b2bc37d9312712331dcb Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:16 +0900 Subject: mm/slub: move kmalloc_large_node() to slab_common.c In later patch SLAB will also pass requests larger than order-1 page to page allocator. Move kmalloc_large_node() to slab_common.c. Fold kmalloc_large_node_hook() into kmalloc_large_node() as there is no other caller. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 4 ++++ mm/slab_common.c | 22 ++++++++++++++++++++++ mm/slub.c | 25 ------------------------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 15a4c59da59e..082499306098 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -491,6 +491,10 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); + +void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment + __alloc_size(1); + /** * kmalloc - allocate memory * @size: how many bytes of memory are required. diff --git a/mm/slab_common.c b/mm/slab_common.c index 8b1988544b89..1b9101f9cb21 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -928,6 +928,28 @@ void *kmalloc_large(size_t size, gfp_t flags) } EXPORT_SYMBOL(kmalloc_large); +void *kmalloc_large_node(size_t size, gfp_t flags, int node) +{ + struct page *page; + void *ptr = NULL; + unsigned int order = get_order(size); + + flags |= __GFP_COMP; + page = alloc_pages_node(node, flags, order); + if (page) { + ptr = page_address(page); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + PAGE_SIZE << order); + } + + ptr = kasan_kmalloc_large(ptr, size, flags); + /* As ptr might get tagged, call kmemleak hook after KASAN. */ + kmemleak_alloc(ptr, size, 1, flags); + + return ptr; +} +EXPORT_SYMBOL(kmalloc_large_node); + #ifdef CONFIG_SLAB_FREELIST_RANDOM /* Randomize a generic freelist */ static void freelist_randomize(struct rnd_state *state, unsigned int *list, diff --git a/mm/slub.c b/mm/slub.c index f9929ba858ec..5e7819ade2c4 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1704,14 +1704,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. */ -static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) -{ - ptr = kasan_kmalloc_large(ptr, size, flags); - /* As ptr might get tagged, call kmemleak hook after KASAN. */ - kmemleak_alloc(ptr, size, 1, flags); - return ptr; -} - static __always_inline void kfree_hook(void *x) { kmemleak_free(x); @@ -4402,23 +4394,6 @@ static int __init setup_slub_min_objects(char *str) __setup("slub_min_objects=", setup_slub_min_objects); -static void *kmalloc_large_node(size_t size, gfp_t flags, int node) -{ - struct page *page; - void *ptr = NULL; - unsigned int order = get_order(size); - - flags |= __GFP_COMP; - page = alloc_pages_node(node, flags, order); - if (page) { - ptr = page_address(page); - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, - PAGE_SIZE << order); - } - - return kmalloc_large_node_hook(ptr, size, flags); -} - static __always_inline void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) { -- cgit v1.2.3 From bf37d791022ecfb1279ac88c5448a53f1ae40a59 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:17 +0900 Subject: mm/slab_common: kmalloc_node: pass large requests to page allocator Now that kmalloc_large_node() is in common code, pass large requests to page allocator in kmalloc_node() using kmalloc_large_node(). One problem is that currently there is no tracepoint in kmalloc_large_node(). Instead of simply putting tracepoint in it, use kmalloc_large_node{,_notrace} depending on its caller to show useful address for both inlined kmalloc_node() and __kmalloc_node_track_caller() when large objects are allocated. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 26 +++++++++++++++++++------- mm/slab.h | 2 ++ mm/slab_common.c | 11 ++++++++++- mm/slub.c | 2 +- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 082499306098..fd2e129fc813 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -571,23 +571,35 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } +#ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { -#ifndef CONFIG_SLOB - if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE) { - unsigned int i = kmalloc_index(size); + if (__builtin_constant_p(size)) { + unsigned int index; - if (!i) + if (size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large_node(size, flags, node); + + index = kmalloc_index(size); + + if (!index) return ZERO_SIZE_PTR; return kmem_cache_alloc_node_trace( - kmalloc_caches[kmalloc_type(flags)][i], + kmalloc_caches[kmalloc_type(flags)][index], flags, node, size); } -#endif return __kmalloc_node(size, flags, node); } +#else +static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large_node(size, flags, node); + + return __kmalloc_node(size, flags, node); +} +#endif /** * kmalloc_array - allocate memory for an array. diff --git a/mm/slab.h b/mm/slab.h index 4ec82bec15ec..801a207a5cd7 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -275,6 +275,8 @@ void create_kmalloc_caches(slab_flags_t); struct kmem_cache *kmalloc_slab(size_t, gfp_t); #endif +void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node); + gfp_t kmalloc_fix_flags(gfp_t flags); /* Functions provided by the slab allocators */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 1b9101f9cb21..7a0942d54424 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -928,7 +928,7 @@ void *kmalloc_large(size_t size, gfp_t flags) } EXPORT_SYMBOL(kmalloc_large); -void *kmalloc_large_node(size_t size, gfp_t flags, int node) +void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node) { struct page *page; void *ptr = NULL; @@ -948,6 +948,15 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) return ptr; } + +void *kmalloc_large_node(size_t size, gfp_t flags, int node) +{ + void *ret = kmalloc_large_node_notrace(size, flags, node); + + trace_kmalloc_node(_RET_IP_, ret, NULL, size, + PAGE_SIZE << get_order(size), flags, node); + return ret; +} EXPORT_SYMBOL(kmalloc_large_node); #ifdef CONFIG_SLAB_FREELIST_RANDOM diff --git a/mm/slub.c b/mm/slub.c index 5e7819ade2c4..165fe87af204 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4401,7 +4401,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller void *ret; if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - ret = kmalloc_large_node(size, flags, node); + ret = kmalloc_large_node_notrace(size, flags, node); trace_kmalloc_node(caller, ret, NULL, size, PAGE_SIZE << get_order(size), -- cgit v1.2.3 From c4cab557521a73bd803e5c6f613b4e00bd3c4662 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:18 +0900 Subject: mm/slab_common: cleanup kmalloc_large() Now that kmalloc_large() and kmalloc_large_node() do mostly same job, make kmalloc_large() wrapper of kmalloc_large_node_notrace(). In the meantime, add missing flag fix code in kmalloc_large_node_notrace(). Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 7a0942d54424..51ccd0545816 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -905,28 +905,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags) * directly to the page allocator. We use __GFP_COMP, because we will need to * know the allocation order to free the pages properly in kfree. */ -void *kmalloc_large(size_t size, gfp_t flags) -{ - void *ret = NULL; - struct page *page; - unsigned int order = get_order(size); - - if (unlikely(flags & GFP_SLAB_BUG_MASK)) - flags = kmalloc_fix_flags(flags); - - page = alloc_pages(flags | __GFP_COMP, order); - if (likely(page)) { - ret = page_address(page); - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, - PAGE_SIZE << order); - } - ret = kasan_kmalloc_large(ret, size, flags); - /* As ret might get tagged, call kmemleak hook after KASAN. */ - kmemleak_alloc(ret, size, 1, flags); - trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << order, flags); - return ret; -} -EXPORT_SYMBOL(kmalloc_large); void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node) { @@ -934,6 +912,9 @@ void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node) void *ptr = NULL; unsigned int order = get_order(size); + if (unlikely(flags & GFP_SLAB_BUG_MASK)) + flags = kmalloc_fix_flags(flags); + flags |= __GFP_COMP; page = alloc_pages_node(node, flags, order); if (page) { @@ -949,6 +930,16 @@ void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node) return ptr; } +void *kmalloc_large(size_t size, gfp_t flags) +{ + void *ret = kmalloc_large_node_notrace(size, flags, NUMA_NO_NODE); + + trace_kmalloc(_RET_IP_, ret, NULL, size, + PAGE_SIZE << get_order(size), flags); + return ret; +} +EXPORT_SYMBOL(kmalloc_large); + void *kmalloc_large_node(size_t size, gfp_t flags, int node) { void *ret = kmalloc_large_node_notrace(size, flags, node); -- cgit v1.2.3 From d6a71648dbc0ca5520cba16a8fdce8d37ae74218 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:19 +0900 Subject: mm/slab: kmalloc: pass requests larger than order-1 page to page allocator There is not much benefit for serving large objects in kmalloc(). Let's pass large requests to page allocator like SLUB for better maintenance of common code. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 23 +++++--------------- mm/slab.c | 60 ++++++++++++++++++++++++++++++++++++---------------- mm/slab.h | 3 +++ mm/slab_common.c | 25 ++++++++++++++++------ mm/slub.c | 19 ----------------- 5 files changed, 68 insertions(+), 62 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index fd2e129fc813..4ee5b2fed164 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -243,27 +243,17 @@ static inline unsigned int arch_slab_minalign(void) #ifdef CONFIG_SLAB /* - * The largest kmalloc size supported by the SLAB allocators is - * 32 megabyte (2^25) or the maximum allocatable page order if that is - * less than 32 MB. - * - * WARNING: Its not easy to increase this value since the allocators have - * to do various tricks to work around compiler limitations in order to - * ensure proper constant folding. + * SLAB and SLUB directly allocates requests fitting in to an order-1 page + * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ - (MAX_ORDER + PAGE_SHIFT - 1) : 25) -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) +#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif #endif #ifdef CONFIG_SLUB -/* - * SLUB directly allocates requests fitting in to an order-1 page - * (PAGE_SIZE*2). Larger requests are passed to the page allocator. - */ #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) #define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1) #ifndef KMALLOC_SHIFT_LOW @@ -415,10 +405,6 @@ static __always_inline unsigned int __kmalloc_index(size_t size, if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; - if (size <= 4 * 1024 * 1024) return 22; - if (size <= 8 * 1024 * 1024) return 23; - if (size <= 16 * 1024 * 1024) return 24; - if (size <= 32 * 1024 * 1024) return 25; if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); @@ -428,6 +414,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size, /* Will never be reached. Needed because the compiler may complain */ return -1; } +static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) #endif /* !CONFIG_SLOB */ diff --git a/mm/slab.c b/mm/slab.c index 8c08d7f3dead..10c9af904410 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3585,11 +3585,19 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) struct kmem_cache *cachep; void *ret; - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) - return NULL; + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { + ret = kmalloc_large_node_notrace(size, flags, node); + + trace_kmalloc_node(caller, ret, NULL, size, + PAGE_SIZE << get_order(size), + flags, node); + return ret; + } + cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; + ret = kmem_cache_alloc_node_trace(cachep, flags, node, size); ret = kasan_kmalloc(cachep, ret, size, flags); @@ -3664,17 +3672,27 @@ EXPORT_SYMBOL(kmem_cache_free); void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) { - struct kmem_cache *s; - size_t i; local_irq_disable(); - for (i = 0; i < size; i++) { + for (int i = 0; i < size; i++) { void *objp = p[i]; + struct kmem_cache *s; - if (!orig_s) /* called via kfree_bulk */ - s = virt_to_cache(objp); - else + if (!orig_s) { + struct folio *folio = virt_to_folio(objp); + + /* called via kfree_bulk */ + if (!folio_test_slab(folio)) { + local_irq_enable(); + free_large_kmalloc(folio, objp); + local_irq_disable(); + continue; + } + s = folio_slab(folio)->slab_cache; + } else { s = cache_from_obj(orig_s, objp); + } + if (!s) continue; @@ -3703,20 +3721,24 @@ void kfree(const void *objp) { struct kmem_cache *c; unsigned long flags; + struct folio *folio; trace_kfree(_RET_IP_, objp); if (unlikely(ZERO_OR_NULL_PTR(objp))) return; - local_irq_save(flags); - kfree_debugcheck(objp); - c = virt_to_cache(objp); - if (!c) { - local_irq_restore(flags); + + folio = virt_to_folio(objp); + if (!folio_test_slab(folio)) { + free_large_kmalloc(folio, (void *)objp); return; } - debug_check_no_locks_freed(objp, c->object_size); + c = folio_slab(folio)->slab_cache; + + local_irq_save(flags); + kfree_debugcheck(objp); + debug_check_no_locks_freed(objp, c->object_size); debug_check_no_obj_freed(objp, c->object_size); __cache_free(c, (void *)objp, _RET_IP_); local_irq_restore(flags); @@ -4138,15 +4160,17 @@ void __check_heap_object(const void *ptr, unsigned long n, size_t __ksize(const void *objp) { struct kmem_cache *c; - size_t size; + struct folio *folio; BUG_ON(!objp); if (unlikely(objp == ZERO_SIZE_PTR)) return 0; - c = virt_to_cache(objp); - size = c ? c->object_size : 0; + folio = virt_to_folio(objp); + if (!folio_test_slab(folio)) + return folio_size(folio); - return size; + c = folio_slab(folio)->slab_cache; + return c->object_size; } EXPORT_SYMBOL(__ksize); diff --git a/mm/slab.h b/mm/slab.h index 801a207a5cd7..9808d537f6ba 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -660,6 +660,9 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) print_tracking(cachep, x); return cachep; } + +void free_large_kmalloc(struct folio *folio, void *object); + #endif /* CONFIG_SLOB */ static inline size_t slab_ksize(const struct kmem_cache *s) diff --git a/mm/slab_common.c b/mm/slab_common.c index 51ccd0545816..5a2e81f42ee9 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -744,8 +744,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) /* * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. - * kmalloc_index() supports up to 2^25=32MB, so the final entry of the table is - * kmalloc-32M. + * kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is + * kmalloc-2M. */ const struct kmalloc_info_struct kmalloc_info[] __initconst = { INIT_KMALLOC_INFO(0, 0), @@ -769,11 +769,7 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = { INIT_KMALLOC_INFO(262144, 256k), INIT_KMALLOC_INFO(524288, 512k), INIT_KMALLOC_INFO(1048576, 1M), - INIT_KMALLOC_INFO(2097152, 2M), - INIT_KMALLOC_INFO(4194304, 4M), - INIT_KMALLOC_INFO(8388608, 8M), - INIT_KMALLOC_INFO(16777216, 16M), - INIT_KMALLOC_INFO(33554432, 32M) + INIT_KMALLOC_INFO(2097152, 2M) }; /* @@ -886,6 +882,21 @@ void __init create_kmalloc_caches(slab_flags_t flags) /* Kmalloc array is now usable */ slab_state = UP; } + +void free_large_kmalloc(struct folio *folio, void *object) +{ + unsigned int order = folio_order(folio); + + if (WARN_ON_ONCE(order == 0)) + pr_warn_once("object pointer: 0x%p\n", object); + + kmemleak_free(object); + kasan_kfree_large(object); + + mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, + -(PAGE_SIZE << order)); + __free_pages(folio_page(folio, 0), order); +} #endif /* !CONFIG_SLOB */ gfp_t kmalloc_fix_flags(gfp_t flags) diff --git a/mm/slub.c b/mm/slub.c index 165fe87af204..a659874c5d44 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1704,12 +1704,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. */ -static __always_inline void kfree_hook(void *x) -{ - kmemleak_free(x); - kasan_kfree_large(x); -} - static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x, bool init) { @@ -3550,19 +3544,6 @@ struct detached_freelist { struct kmem_cache *s; }; -static inline void free_large_kmalloc(struct folio *folio, void *object) -{ - unsigned int order = folio_order(folio); - - if (WARN_ON_ONCE(order == 0)) - pr_warn_once("object pointer: 0x%p\n", object); - - kfree_hook(object); - mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); - __free_pages(folio_page(folio, 0), order); -} - /* * This function progressively scans the array with free objects (with * a limited look ahead) and extract objects belonging to the same -- cgit v1.2.3 From ed4cd17eb26d7f0c6a762608a3f30870929fbcdd Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:20 +0900 Subject: mm/sl[au]b: introduce common alloc/free functions without tracepoint To unify kmalloc functions in later patch, introduce common alloc/free functions that does not have tracepoint. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slab.c | 36 +++++++++++++++++++++++++++++------- mm/slab.h | 5 +++++ mm/slub.c | 13 +++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 10c9af904410..aa61851b0a07 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3560,6 +3560,14 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) } EXPORT_SYMBOL(kmem_cache_alloc_node); +void *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, + int nodeid, size_t orig_size, + unsigned long caller) +{ + return slab_alloc_node(cachep, NULL, flags, nodeid, + orig_size, caller); +} + #ifdef CONFIG_TRACING void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, gfp_t flags, @@ -3645,6 +3653,26 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); +static __always_inline +void __do_kmem_cache_free(struct kmem_cache *cachep, void *objp, + unsigned long caller) +{ + unsigned long flags; + + local_irq_save(flags); + debug_check_no_locks_freed(objp, cachep->object_size); + if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(objp, cachep->object_size); + __cache_free(cachep, objp, caller); + local_irq_restore(flags); +} + +void __kmem_cache_free(struct kmem_cache *cachep, void *objp, + unsigned long caller) +{ + __do_kmem_cache_free(cachep, objp, caller); +} + /** * kmem_cache_free - Deallocate an object * @cachep: The cache the allocation was from. @@ -3655,18 +3683,12 @@ EXPORT_SYMBOL(__kmalloc); */ void kmem_cache_free(struct kmem_cache *cachep, void *objp) { - unsigned long flags; cachep = cache_from_obj(cachep, objp); if (!cachep) return; trace_kmem_cache_free(_RET_IP_, objp, cachep->name); - local_irq_save(flags); - debug_check_no_locks_freed(objp, cachep->object_size); - if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(objp, cachep->object_size); - __cache_free(cachep, objp, _RET_IP_); - local_irq_restore(flags); + __do_kmem_cache_free(cachep, objp, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); diff --git a/mm/slab.h b/mm/slab.h index 9808d537f6ba..8eefeed95407 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -273,6 +273,11 @@ void create_kmalloc_caches(slab_flags_t); /* Find the kmalloc slab corresponding for a certain size */ struct kmem_cache *kmalloc_slab(size_t, gfp_t); + +void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t orig_size, + unsigned long caller); +void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller); #endif void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node); diff --git a/mm/slub.c b/mm/slub.c index a659874c5d44..a11f78c2647c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3262,6 +3262,14 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, } EXPORT_SYMBOL(kmem_cache_alloc_lru); +void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t orig_size, + unsigned long caller) +{ + return slab_alloc_node(s, NULL, gfpflags, node, + caller, orig_size); +} + #ifdef CONFIG_TRACING void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { @@ -3526,6 +3534,11 @@ void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) } #endif +void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller) +{ + slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller); +} + void kmem_cache_free(struct kmem_cache *s, void *x) { s = cache_from_obj(s, x); -- cgit v1.2.3 From b14051352465a24b3c9ceaccac4e39b3521bb370 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:21 +0900 Subject: mm/sl[au]b: generalize kmalloc subsystem Now everything in kmalloc subsystem can be generalized. Let's do it! Generalize __do_kmalloc_node(), __kmalloc_node_track_caller(), kfree(), __ksize(), __kmalloc(), __kmalloc_node() and move them to slab_common.c. In the meantime, rename kmalloc_large_node_notrace() to __kmalloc_large_node() and make it static as it's now only called in slab_common.c. [ feng.tang@intel.com: adjust kfence skip list to include __kmem_cache_free so that kfence kunit tests do not fail ] Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/kfence/report.c | 1 + mm/slab.c | 108 ---------------------------------------------------- mm/slab.h | 2 - mm/slab_common.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++-- mm/slub.c | 87 ------------------------------------------ 5 files changed, 107 insertions(+), 200 deletions(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index f5a6d8ba3e21..7e496856c2eb 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -86,6 +86,7 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries /* Also the *_bulk() variants by only checking prefixes. */ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; diff --git a/mm/slab.c b/mm/slab.c index aa61851b0a07..5b234e3ab165 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3587,44 +3587,6 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, EXPORT_SYMBOL(kmem_cache_alloc_node_trace); #endif -static __always_inline void * -__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) -{ - struct kmem_cache *cachep; - void *ret; - - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - ret = kmalloc_large_node_notrace(size, flags, node); - - trace_kmalloc_node(caller, ret, NULL, size, - PAGE_SIZE << get_order(size), - flags, node); - return ret; - } - - cachep = kmalloc_slab(size, flags); - if (unlikely(ZERO_OR_NULL_PTR(cachep))) - return cachep; - - ret = kmem_cache_alloc_node_trace(cachep, flags, node, size); - ret = kasan_kmalloc(cachep, ret, size, flags); - - return ret; -} - -void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __do_kmalloc_node(size, flags, node, _RET_IP_); -} -EXPORT_SYMBOL(__kmalloc_node); - -void *__kmalloc_node_track_caller(size_t size, gfp_t flags, - int node, unsigned long caller) -{ - return __do_kmalloc_node(size, flags, node, caller); -} -EXPORT_SYMBOL(__kmalloc_node_track_caller); - #ifdef CONFIG_PRINTK void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { @@ -3647,12 +3609,6 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) } #endif -void *__kmalloc(size_t size, gfp_t flags) -{ - return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); -} -EXPORT_SYMBOL(__kmalloc); - static __always_inline void __do_kmem_cache_free(struct kmem_cache *cachep, void *objp, unsigned long caller) @@ -3730,43 +3686,6 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -/** - * kfree - free previously allocated memory - * @objp: pointer returned by kmalloc. - * - * If @objp is NULL, no operation is performed. - * - * Don't free memory not originally allocated by kmalloc() - * or you will run into trouble. - */ -void kfree(const void *objp) -{ - struct kmem_cache *c; - unsigned long flags; - struct folio *folio; - - trace_kfree(_RET_IP_, objp); - - if (unlikely(ZERO_OR_NULL_PTR(objp))) - return; - - folio = virt_to_folio(objp); - if (!folio_test_slab(folio)) { - free_large_kmalloc(folio, (void *)objp); - return; - } - - c = folio_slab(folio)->slab_cache; - - local_irq_save(flags); - kfree_debugcheck(objp); - debug_check_no_locks_freed(objp, c->object_size); - debug_check_no_obj_freed(objp, c->object_size); - __cache_free(c, (void *)objp, _RET_IP_); - local_irq_restore(flags); -} -EXPORT_SYMBOL(kfree); - /* * This initializes kmem_cache_node or resizes various caches for all nodes. */ @@ -4169,30 +4088,3 @@ void __check_heap_object(const void *ptr, unsigned long n, usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ - -/** - * __ksize -- Uninstrumented ksize. - * @objp: pointer to the object - * - * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same - * safety checks as ksize() with KASAN instrumentation enabled. - * - * Return: size of the actual memory used by @objp in bytes - */ -size_t __ksize(const void *objp) -{ - struct kmem_cache *c; - struct folio *folio; - - BUG_ON(!objp); - if (unlikely(objp == ZERO_SIZE_PTR)) - return 0; - - folio = virt_to_folio(objp); - if (!folio_test_slab(folio)) - return folio_size(folio); - - c = folio_slab(folio)->slab_cache; - return c->object_size; -} -EXPORT_SYMBOL(__ksize); diff --git a/mm/slab.h b/mm/slab.h index 8eefeed95407..4d8330d57573 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -280,8 +280,6 @@ void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller); #endif -void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node); - gfp_t kmalloc_fix_flags(gfp_t flags); /* Functions provided by the slab allocators */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 5a2e81f42ee9..6a744fc3d7f2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -897,6 +897,109 @@ void free_large_kmalloc(struct folio *folio, void *object) -(PAGE_SIZE << order)); __free_pages(folio_page(folio, 0), order); } + +static void *__kmalloc_large_node(size_t size, gfp_t flags, int node); +static __always_inline +void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) +{ + struct kmem_cache *s; + void *ret; + + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { + ret = __kmalloc_large_node(size, flags, node); + trace_kmalloc_node(caller, ret, NULL, + size, PAGE_SIZE << get_order(size), + flags, node); + return ret; + } + + s = kmalloc_slab(size, flags); + + if (unlikely(ZERO_OR_NULL_PTR(s))) + return s; + + ret = __kmem_cache_alloc_node(s, flags, node, size, caller); + ret = kasan_kmalloc(s, ret, size, flags); + trace_kmalloc_node(caller, ret, s, size, + s->size, flags, node); + return ret; +} + +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, _RET_IP_); +} +EXPORT_SYMBOL(__kmalloc_node); + +void *__kmalloc(size_t size, gfp_t flags) +{ + return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); +} +EXPORT_SYMBOL(__kmalloc); + +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, + int node, unsigned long caller) +{ + return __do_kmalloc_node(size, flags, node, caller); +} +EXPORT_SYMBOL(__kmalloc_node_track_caller); + +/** + * kfree - free previously allocated memory + * @object: pointer returned by kmalloc. + * + * If @object is NULL, no operation is performed. + * + * Don't free memory not originally allocated by kmalloc() + * or you will run into trouble. + */ +void kfree(const void *object) +{ + struct folio *folio; + struct slab *slab; + struct kmem_cache *s; + + trace_kfree(_RET_IP_, object); + + if (unlikely(ZERO_OR_NULL_PTR(object))) + return; + + folio = virt_to_folio(object); + if (unlikely(!folio_test_slab(folio))) { + free_large_kmalloc(folio, (void *)object); + return; + } + + slab = folio_slab(folio); + s = slab->slab_cache; + __kmem_cache_free(s, (void *)object, _RET_IP_); +} +EXPORT_SYMBOL(kfree); + +/** + * __ksize -- Uninstrumented ksize. + * @object: pointer to the object + * + * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same + * safety checks as ksize() with KASAN instrumentation enabled. + * + * Return: size of the actual memory used by @object in bytes + */ +size_t __ksize(const void *object) +{ + struct folio *folio; + + if (unlikely(object == ZERO_SIZE_PTR)) + return 0; + + folio = virt_to_folio(object); + + if (unlikely(!folio_test_slab(folio))) + return folio_size(folio); + + return slab_ksize(folio_slab(folio)->slab_cache); +} +EXPORT_SYMBOL(__ksize); #endif /* !CONFIG_SLOB */ gfp_t kmalloc_fix_flags(gfp_t flags) @@ -917,7 +1020,7 @@ gfp_t kmalloc_fix_flags(gfp_t flags) * know the allocation order to free the pages properly in kfree. */ -void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node) +static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) { struct page *page; void *ptr = NULL; @@ -943,7 +1046,7 @@ void *kmalloc_large_node_notrace(size_t size, gfp_t flags, int node) void *kmalloc_large(size_t size, gfp_t flags) { - void *ret = kmalloc_large_node_notrace(size, flags, NUMA_NO_NODE); + void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); trace_kmalloc(_RET_IP_, ret, NULL, size, PAGE_SIZE << get_order(size), flags); @@ -953,7 +1056,7 @@ EXPORT_SYMBOL(kmalloc_large); void *kmalloc_large_node(size_t size, gfp_t flags, int node) { - void *ret = kmalloc_large_node_notrace(size, flags, node); + void *ret = __kmalloc_large_node(size, flags, node); trace_kmalloc_node(_RET_IP_, ret, NULL, size, PAGE_SIZE << get_order(size), flags, node); diff --git a/mm/slub.c b/mm/slub.c index a11f78c2647c..cd49785d59e1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4388,49 +4388,6 @@ static int __init setup_slub_min_objects(char *str) __setup("slub_min_objects=", setup_slub_min_objects); -static __always_inline -void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) -{ - struct kmem_cache *s; - void *ret; - - if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - ret = kmalloc_large_node_notrace(size, flags, node); - - trace_kmalloc_node(caller, ret, NULL, - size, PAGE_SIZE << get_order(size), - flags, node); - - return ret; - } - - s = kmalloc_slab(size, flags); - - if (unlikely(ZERO_OR_NULL_PTR(s))) - return s; - - ret = slab_alloc_node(s, NULL, flags, node, caller, size); - - trace_kmalloc_node(caller, ret, s, size, s->size, flags, node); - - ret = kasan_kmalloc(s, ret, size, flags); - - return ret; -} - -void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __do_kmalloc_node(size, flags, node, _RET_IP_); -} -EXPORT_SYMBOL(__kmalloc_node); - -void *__kmalloc(size_t size, gfp_t flags) -{ - return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); -} -EXPORT_SYMBOL(__kmalloc); - - #ifdef CONFIG_HARDENED_USERCOPY /* * Rejects incorrectly sized objects and objects that are to be copied @@ -4481,43 +4438,6 @@ void __check_heap_object(const void *ptr, unsigned long n, } #endif /* CONFIG_HARDENED_USERCOPY */ -size_t __ksize(const void *object) -{ - struct folio *folio; - - if (unlikely(object == ZERO_SIZE_PTR)) - return 0; - - folio = virt_to_folio(object); - - if (unlikely(!folio_test_slab(folio))) - return folio_size(folio); - - return slab_ksize(folio_slab(folio)->slab_cache); -} -EXPORT_SYMBOL(__ksize); - -void kfree(const void *x) -{ - struct folio *folio; - struct slab *slab; - void *object = (void *)x; - - trace_kfree(_RET_IP_, x); - - if (unlikely(ZERO_OR_NULL_PTR(x))) - return; - - folio = virt_to_folio(x); - if (unlikely(!folio_test_slab(folio))) { - free_large_kmalloc(folio, object); - return; - } - slab = folio_slab(folio); - slab_free(slab->slab_cache, slab, object, NULL, &object, 1, _RET_IP_); -} -EXPORT_SYMBOL(kfree); - #define SHRINK_PROMOTE_MAX 32 /* @@ -4863,13 +4783,6 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) return 0; } -void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, - int node, unsigned long caller) -{ - return __do_kmalloc_node(size, gfpflags, node, caller); -} -EXPORT_SYMBOL(__kmalloc_node_track_caller); - #ifdef CONFIG_SYSFS static int count_inuse(struct slab *slab) { -- cgit v1.2.3 From 26a40990ba052e6f553256f9d0f112452b992a38 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:22 +0900 Subject: mm/sl[au]b: cleanup kmem_cache_alloc[_node]_trace() Despite its name, kmem_cache_alloc[_node]_trace() is hook for inlined kmalloc. So rename it to kmalloc[_node]_trace(). Move its implementation to slab_common.c by using __kmem_cache_alloc_node(), but keep CONFIG_TRACING=n varients to save a function call when CONFIG_TRACING=n. Use __assume_kmalloc_alignment for kmalloc[_node]_trace instead of __assume_slab_alignement. Generally kmalloc has larger alignment requirements. Suggested-by: Vlastimil Babka Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 27 ++++++++++++++------------- mm/slab.c | 35 ----------------------------------- mm/slab_common.c | 27 +++++++++++++++++++++++++++ mm/slub.c | 27 --------------------------- 4 files changed, 41 insertions(+), 75 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 4ee5b2fed164..c8e485ce8815 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -449,16 +449,16 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assum __malloc; #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) - __assume_slab_alignment __alloc_size(3); - -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment - __alloc_size(4); +void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_kmalloc_alignment __alloc_size(3); +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_kmalloc_alignment + __alloc_size(4); #else /* CONFIG_TRACING */ -static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s, - gfp_t flags, size_t size) +/* Save a function call when CONFIG_TRACING=n */ +static __always_inline __alloc_size(3) +void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { void *ret = kmem_cache_alloc(s, flags); @@ -466,8 +466,9 @@ static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_ return ret; } -static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) +static __always_inline __alloc_size(4) +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = kmem_cache_alloc_node(s, gfpflags, node); @@ -550,7 +551,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace( + return kmalloc_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, size); #endif @@ -572,9 +573,9 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla if (!index) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace( + return kmalloc_node_trace( kmalloc_caches[kmalloc_type(flags)][index], - flags, node, size); + flags, node, size); } return __kmalloc_node(size, flags, node); } diff --git a/mm/slab.c b/mm/slab.c index 5b234e3ab165..8d9d0fbf9792 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3519,22 +3519,6 @@ error: } EXPORT_SYMBOL(kmem_cache_alloc_bulk); -#ifdef CONFIG_TRACING -void * -kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size) -{ - void *ret; - - ret = slab_alloc(cachep, NULL, flags, size, _RET_IP_); - - ret = kasan_kmalloc(cachep, ret, size, flags); - trace_kmalloc(_RET_IP_, ret, cachep, - size, cachep->size, flags); - return ret; -} -EXPORT_SYMBOL(kmem_cache_alloc_trace); -#endif - /** * kmem_cache_alloc_node - Allocate an object on the specified node * @cachep: The cache to allocate from. @@ -3568,25 +3552,6 @@ void *__kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, orig_size, caller); } -#ifdef CONFIG_TRACING -void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, - gfp_t flags, - int nodeid, - size_t size) -{ - void *ret; - - ret = slab_alloc_node(cachep, NULL, flags, nodeid, size, _RET_IP_); - - ret = kasan_kmalloc(cachep, ret, size, flags); - trace_kmalloc_node(_RET_IP_, ret, cachep, - size, cachep->size, - flags, nodeid); - return ret; -} -EXPORT_SYMBOL(kmem_cache_alloc_node_trace); -#endif - #ifdef CONFIG_PRINTK void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { diff --git a/mm/slab_common.c b/mm/slab_common.c index 6a744fc3d7f2..2eab19043940 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1000,6 +1000,33 @@ size_t __ksize(const void *object) return slab_ksize(folio_slab(folio)->slab_cache); } EXPORT_SYMBOL(__ksize); + +#ifdef CONFIG_TRACING +void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) +{ + void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE, + size, _RET_IP_); + + trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, + gfpflags, NUMA_NO_NODE); + + ret = kasan_kmalloc(s, ret, size, gfpflags); + return ret; +} +EXPORT_SYMBOL(kmalloc_trace); + +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) +{ + void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_); + + trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, gfpflags, node); + + ret = kasan_kmalloc(s, ret, size, gfpflags); + return ret; +} +EXPORT_SYMBOL(kmalloc_node_trace); +#endif /* !CONFIG_TRACING */ #endif /* !CONFIG_SLOB */ gfp_t kmalloc_fix_flags(gfp_t flags) diff --git a/mm/slub.c b/mm/slub.c index cd49785d59e1..7d7fd9d4e8fa 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3270,17 +3270,6 @@ void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, caller, orig_size); } -#ifdef CONFIG_TRACING -void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) -{ - void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size); - trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags); - ret = kasan_kmalloc(s, ret, size, gfpflags); - return ret; -} -EXPORT_SYMBOL(kmem_cache_alloc_trace); -#endif - void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); @@ -3292,22 +3281,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) } EXPORT_SYMBOL(kmem_cache_alloc_node); -#ifdef CONFIG_TRACING -void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) -{ - void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size); - - trace_kmalloc_node(_RET_IP_, ret, s, - size, s->size, gfpflags, node); - - ret = kasan_kmalloc(s, ret, size, gfpflags); - return ret; -} -EXPORT_SYMBOL(kmem_cache_alloc_node_trace); -#endif - /* * Slow path handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. -- cgit v1.2.3 From 11e9734bcb6a7361943f993eba4e97f5812120d8 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:23 +0900 Subject: mm/slab_common: unify NUMA and UMA version of tracepoints Drop kmem_alloc event class, rename kmem_alloc_node to kmem_alloc, and remove _node postfix for NUMA version of tracepoints. This will break some tools that depend on {kmem_cache_alloc,kmalloc}_node, but at this point maintaining both kmem_alloc and kmem_alloc_node event classes does not makes sense at all. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/trace/events/kmem.h | 60 ++------------------------------------------- mm/slab.c | 9 +++---- mm/slab_common.c | 21 ++++++---------- mm/slob.c | 20 +++++++-------- mm/slub.c | 6 ++--- 5 files changed, 27 insertions(+), 89 deletions(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 4cb51ace600d..e078ebcdc4b1 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -11,62 +11,6 @@ DECLARE_EVENT_CLASS(kmem_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - struct kmem_cache *s, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( unsigned long, gfp_flags ) - __field( bool, accounted ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = (__force unsigned long)gfp_flags; - __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ? - ((gfp_flags & __GFP_ACCOUNT) || - (s && s->flags & SLAB_ACCOUNT)) : false; - ), - - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s", - (void *)__entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->accounted ? "true" : "false") -); - -DEFINE_EVENT(kmem_alloc, kmalloc, - - TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, - size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags) -); - -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, - size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags) -); - -DECLARE_EVENT_CLASS(kmem_alloc_node, - TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, @@ -109,7 +53,7 @@ DECLARE_EVENT_CLASS(kmem_alloc_node, __entry->accounted ? "true" : "false") ); -DEFINE_EVENT(kmem_alloc_node, kmalloc_node, +DEFINE_EVENT(kmem_alloc, kmalloc, TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc, @@ -118,7 +62,7 @@ DEFINE_EVENT(kmem_alloc_node, kmalloc_node, TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node) ); -DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc, diff --git a/mm/slab.c b/mm/slab.c index 8d9d0fbf9792..2fd400203ac2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3440,8 +3440,8 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, { void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_); - trace_kmem_cache_alloc(_RET_IP_, ret, cachep, - cachep->object_size, cachep->size, flags); + trace_kmem_cache_alloc(_RET_IP_, ret, cachep, cachep->object_size, + cachep->size, flags, NUMA_NO_NODE); return ret; } @@ -3536,9 +3536,8 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_); - trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep, - cachep->object_size, cachep->size, - flags, nodeid); + trace_kmem_cache_alloc(_RET_IP_, ret, cachep, cachep->object_size, + cachep->size, flags, nodeid); return ret; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 2eab19043940..3d7ad992ece1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -907,9 +907,8 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = __kmalloc_large_node(size, flags, node); - trace_kmalloc_node(caller, ret, NULL, - size, PAGE_SIZE << get_order(size), - flags, node); + trace_kmalloc(_RET_IP_, ret, NULL, size, + PAGE_SIZE << get_order(size), flags, node); return ret; } @@ -920,8 +919,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller ret = __kmem_cache_alloc_node(s, flags, node, size, caller); ret = kasan_kmalloc(s, ret, size, flags); - trace_kmalloc_node(caller, ret, s, size, - s->size, flags, node); + trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags, node); return ret; } @@ -1007,8 +1005,7 @@ void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE, size, _RET_IP_); - trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, - gfpflags, NUMA_NO_NODE); + trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags, NUMA_NO_NODE); ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; @@ -1020,7 +1017,7 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, { void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_); - trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, gfpflags, node); + trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags, node); ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; @@ -1076,7 +1073,7 @@ void *kmalloc_large(size_t size, gfp_t flags) void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); trace_kmalloc(_RET_IP_, ret, NULL, size, - PAGE_SIZE << get_order(size), flags); + PAGE_SIZE << get_order(size), flags, NUMA_NO_NODE); return ret; } EXPORT_SYMBOL(kmalloc_large); @@ -1085,8 +1082,8 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) { void *ret = __kmalloc_large_node(size, flags, node); - trace_kmalloc_node(_RET_IP_, ret, NULL, size, - PAGE_SIZE << get_order(size), flags, node); + trace_kmalloc(_RET_IP_, ret, NULL, size, + PAGE_SIZE << get_order(size), flags, node); return ret; } EXPORT_SYMBOL(kmalloc_large_node); @@ -1421,8 +1418,6 @@ EXPORT_SYMBOL(ksize); /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); -EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); -EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); EXPORT_TRACEPOINT_SYMBOL(kfree); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); diff --git a/mm/slob.c b/mm/slob.c index 96b08acd72ce..3208c56d8f82 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -507,8 +507,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) *m = size; ret = (void *)m + minalign; - trace_kmalloc_node(caller, ret, NULL, - size, size + minalign, gfp, node); + trace_kmalloc(caller, ret, NULL, size, + size + minalign, gfp, node); } else { unsigned int order = get_order(size); @@ -516,8 +516,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) gfp |= __GFP_COMP; ret = slob_new_pages(gfp, order, node); - trace_kmalloc_node(caller, ret, NULL, - size, PAGE_SIZE << order, gfp, node); + trace_kmalloc(caller, ret, NULL, size, + PAGE_SIZE << order, gfp, node); } kmemleak_alloc(ret, size, 1, gfp); @@ -608,14 +608,14 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node, 0); - trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size, - SLOB_UNITS(c->size) * SLOB_UNIT, - flags, node); + trace_kmem_cache_alloc(_RET_IP_, b, NULL, c->object_size, + SLOB_UNITS(c->size) * SLOB_UNIT, + flags, node); } else { b = slob_new_pages(flags, get_order(c->size), node); - trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size, - PAGE_SIZE << get_order(c->size), - flags, node); + trace_kmem_cache_alloc(_RET_IP_, b, NULL, c->object_size, + PAGE_SIZE << get_order(c->size), + flags, node); } if (b && c->ctor) { diff --git a/mm/slub.c b/mm/slub.c index 7d7fd9d4e8fa..22e4ccf06638 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3244,7 +3244,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size); trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size, - s->size, gfpflags); + s->size, gfpflags, NUMA_NO_NODE); return ret; } @@ -3274,8 +3274,8 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); - trace_kmem_cache_alloc_node(_RET_IP_, ret, s, - s->object_size, s->size, gfpflags, node); + trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size, + s->size, gfpflags, node); return ret; } -- cgit v1.2.3 From 2c1d697fb8ba6d2d44f914d4268ae1ccdf025f1b Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:24 +0900 Subject: mm/slab_common: drop kmem_alloc & avoid dereferencing fields when not using Drop kmem_alloc event class, and define kmalloc and kmem_cache_alloc using TRACE_EVENT() macro. And then this patch does: - Do not pass pointer to struct kmem_cache to trace_kmalloc. gfp flag is enough to know if it's accounted or not. - Avoid dereferencing s->object_size and s->size when not using kmem_cache_alloc event. - Avoid dereferencing s->name in when not using kmem_cache_free event. - Adjust s->size to SLOB_UNITS(s->size) * SLOB_UNIT in SLOB Cc: Vasily Averin Suggested-by: Vlastimil Babka Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/trace/events/kmem.h | 64 +++++++++++++++++++++++++++++---------------- mm/slab.c | 8 +++--- mm/slab_common.c | 16 ++++++------ mm/slob.c | 19 ++++++-------- mm/slub.c | 8 +++--- 5 files changed, 64 insertions(+), 51 deletions(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index e078ebcdc4b1..243073cfc29d 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -9,17 +9,15 @@ #include #include -DECLARE_EVENT_CLASS(kmem_alloc, +TRACE_EVENT(kmem_cache_alloc, TP_PROTO(unsigned long call_site, const void *ptr, struct kmem_cache *s, - size_t bytes_req, - size_t bytes_alloc, gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, s, gfp_flags, node), TP_STRUCT__entry( __field( unsigned long, call_site ) @@ -34,13 +32,13 @@ DECLARE_EVENT_CLASS(kmem_alloc, TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; + __entry->bytes_req = s->object_size; + __entry->bytes_alloc = s->size; __entry->gfp_flags = (__force unsigned long)gfp_flags; __entry->node = node; __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ? ((gfp_flags & __GFP_ACCOUNT) || - (s && s->flags & SLAB_ACCOUNT)) : false; + (s->flags & SLAB_ACCOUNT)) : false; ), TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s", @@ -53,22 +51,44 @@ DECLARE_EVENT_CLASS(kmem_alloc, __entry->accounted ? "true" : "false") ); -DEFINE_EVENT(kmem_alloc, kmalloc, +TRACE_EVENT(kmalloc, - TP_PROTO(unsigned long call_site, const void *ptr, - struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc, - gfp_t gfp_flags, int node), + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), - TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node) -); + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( unsigned long, gfp_flags ) + __field( int, node ) + ), - TP_PROTO(unsigned long call_site, const void *ptr, - struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc, - gfp_t gfp_flags, int node), + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = (__force unsigned long)gfp_flags; + __entry->node = node; + ), - TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags, node) + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s", + (void *)__entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags), + __entry->node, + (IS_ENABLED(CONFIG_MEMCG_KMEM) && + (__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false") ); TRACE_EVENT(kfree, @@ -93,20 +113,20 @@ TRACE_EVENT(kfree, TRACE_EVENT(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr, const char *name), + TP_PROTO(unsigned long call_site, const void *ptr, const struct kmem_cache *s), - TP_ARGS(call_site, ptr, name), + TP_ARGS(call_site, ptr, s), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) - __string( name, name ) + __string( name, s->name ) ), TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; - __assign_str(name, name); + __assign_str(name, s->name); ), TP_printk("call_site=%pS ptr=%p name=%s", diff --git a/mm/slab.c b/mm/slab.c index 2fd400203ac2..a5486ff8362a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3440,8 +3440,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, { void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_); - trace_kmem_cache_alloc(_RET_IP_, ret, cachep, cachep->object_size, - cachep->size, flags, NUMA_NO_NODE); + trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, NUMA_NO_NODE); return ret; } @@ -3536,8 +3535,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *ret = slab_alloc_node(cachep, NULL, flags, nodeid, cachep->object_size, _RET_IP_); - trace_kmem_cache_alloc(_RET_IP_, ret, cachep, cachep->object_size, - cachep->size, flags, nodeid); + trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags, nodeid); return ret; } @@ -3607,7 +3605,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) if (!cachep) return; - trace_kmem_cache_free(_RET_IP_, objp, cachep->name); + trace_kmem_cache_free(_RET_IP_, objp, cachep); __do_kmem_cache_free(cachep, objp, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); diff --git a/mm/slab_common.c b/mm/slab_common.c index 3d7ad992ece1..ad4c36fb697c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -907,7 +907,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = __kmalloc_large_node(size, flags, node); - trace_kmalloc(_RET_IP_, ret, NULL, size, + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), flags, node); return ret; } @@ -919,7 +919,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller ret = __kmem_cache_alloc_node(s, flags, node, size, caller); ret = kasan_kmalloc(s, ret, size, flags); - trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags, node); + trace_kmalloc(_RET_IP_, ret, size, s->size, flags, node); return ret; } @@ -1005,7 +1005,7 @@ void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE, size, _RET_IP_); - trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags, NUMA_NO_NODE); + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE); ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; @@ -1017,7 +1017,7 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, { void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_); - trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags, node); + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node); ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; @@ -1072,8 +1072,8 @@ void *kmalloc_large(size_t size, gfp_t flags) { void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); - trace_kmalloc(_RET_IP_, ret, NULL, size, - PAGE_SIZE << get_order(size), flags, NUMA_NO_NODE); + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), + flags, NUMA_NO_NODE); return ret; } EXPORT_SYMBOL(kmalloc_large); @@ -1082,8 +1082,8 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) { void *ret = __kmalloc_large_node(size, flags, node); - trace_kmalloc(_RET_IP_, ret, NULL, size, - PAGE_SIZE << get_order(size), flags, node); + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), + flags, node); return ret; } EXPORT_SYMBOL(kmalloc_large_node); diff --git a/mm/slob.c b/mm/slob.c index 3208c56d8f82..771af84576bf 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -507,8 +507,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) *m = size; ret = (void *)m + minalign; - trace_kmalloc(caller, ret, NULL, size, - size + minalign, gfp, node); + trace_kmalloc(caller, ret, size, size + minalign, gfp, node); } else { unsigned int order = get_order(size); @@ -516,8 +515,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) gfp |= __GFP_COMP; ret = slob_new_pages(gfp, order, node); - trace_kmalloc(caller, ret, NULL, size, - PAGE_SIZE << order, gfp, node); + trace_kmalloc(caller, ret, size, PAGE_SIZE << order, gfp, node); } kmemleak_alloc(ret, size, 1, gfp); @@ -594,6 +592,9 @@ int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags) /* leave room for rcu footer at the end of object */ c->size += sizeof(struct slob_rcu); } + + /* Actual size allocated */ + c->size = SLOB_UNITS(c->size) * SLOB_UNIT; c->flags = flags; return 0; } @@ -608,14 +609,10 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node, 0); - trace_kmem_cache_alloc(_RET_IP_, b, NULL, c->object_size, - SLOB_UNITS(c->size) * SLOB_UNIT, - flags, node); + trace_kmem_cache_alloc(_RET_IP_, b, c, flags, node); } else { b = slob_new_pages(flags, get_order(c->size), node); - trace_kmem_cache_alloc(_RET_IP_, b, NULL, c->object_size, - PAGE_SIZE << get_order(c->size), - flags, node); + trace_kmem_cache_alloc(_RET_IP_, b, c, flags, node); } if (b && c->ctor) { @@ -671,7 +668,7 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { kmemleak_free_recursive(b, c->flags); - trace_kmem_cache_free(_RET_IP_, b, c->name); + trace_kmem_cache_free(_RET_IP_, b, c); if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); diff --git a/mm/slub.c b/mm/slub.c index 22e4ccf06638..8083a6ee5f15 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3243,8 +3243,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, { void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size); - trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size, - s->size, gfpflags, NUMA_NO_NODE); + trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE); return ret; } @@ -3274,8 +3273,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); - trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size, - s->size, gfpflags, node); + trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node); return ret; } @@ -3517,7 +3515,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) s = cache_from_obj(s, x); if (!s) return; - trace_kmem_cache_free(_RET_IP_, x, s->name); + trace_kmem_cache_free(_RET_IP_, x, s); slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); -- cgit v1.2.3 From 8dfa9d554061873f96335730fb1d403698b2b1b4 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:25 +0900 Subject: mm/slab_common: move declaration of __ksize() to mm/slab.h __ksize() is only called by KASAN. Remove export symbol and move declaration to mm/slab.h as we don't want to grow its callers. Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 1 - mm/slab.h | 2 ++ mm/slab_common.c | 11 +---------- mm/slob.c | 1 - 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index c8e485ce8815..9b592e611cb1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -187,7 +187,6 @@ int kmem_cache_shrink(struct kmem_cache *s); void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2); void kfree(const void *objp); void kfree_sensitive(const void *objp); -size_t __ksize(const void *objp); size_t ksize(const void *objp); #ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); diff --git a/mm/slab.h b/mm/slab.h index 4d8330d57573..65023f000d42 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -668,6 +668,8 @@ void free_large_kmalloc(struct folio *folio, void *object); #endif /* CONFIG_SLOB */ +size_t __ksize(const void *objp); + static inline size_t slab_ksize(const struct kmem_cache *s) { #ifndef CONFIG_SLUB diff --git a/mm/slab_common.c b/mm/slab_common.c index ad4c36fb697c..500eb777faca 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -974,15 +974,7 @@ void kfree(const void *object) } EXPORT_SYMBOL(kfree); -/** - * __ksize -- Uninstrumented ksize. - * @object: pointer to the object - * - * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same - * safety checks as ksize() with KASAN instrumentation enabled. - * - * Return: size of the actual memory used by @object in bytes - */ +/* Uninstrumented ksize. Only called by KASAN. */ size_t __ksize(const void *object) { struct folio *folio; @@ -997,7 +989,6 @@ size_t __ksize(const void *object) return slab_ksize(folio_slab(folio)->slab_cache); } -EXPORT_SYMBOL(__ksize); #ifdef CONFIG_TRACING void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) diff --git a/mm/slob.c b/mm/slob.c index 771af84576bf..45a061b8ba38 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -584,7 +584,6 @@ size_t __ksize(const void *block) m = (unsigned int *)(block - align); return SLOB_UNITS(*m) * SLOB_UNIT; } -EXPORT_SYMBOL(__ksize); int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags) { -- cgit v1.2.3 From d5eff736902d5565a24f1b571b5987b3e5ee9a5b Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Wed, 17 Aug 2022 19:18:26 +0900 Subject: mm/sl[au]b: check if large object is valid in __ksize() If address of large object is not beginning of folio or size of the folio is too small, it must be invalid. WARN() and return 0 in such cases. Cc: Marco Elver Suggested-by: Vlastimil Babka Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 500eb777faca..7972ec4b9ca4 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -984,8 +984,13 @@ size_t __ksize(const void *object) folio = virt_to_folio(object); - if (unlikely(!folio_test_slab(folio))) + if (unlikely(!folio_test_slab(folio))) { + if (WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE)) + return 0; + if (WARN_ON(object != folio_address(folio))) + return 0; return folio_size(folio); + } return slab_ksize(folio_slab(folio)->slab_cache); } -- cgit v1.2.3