From d50d82faa0c964e31f7a946ba8aba7c715ca7ab0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 27 Jun 2018 23:26:09 -0700 Subject: slub: fix failure when we delete and create a slab cache In kernel 4.17 I removed some code from dm-bufio that did slab cache merging (commit 21bb13276768: "dm bufio: remove code that merges slab caches") - both slab and slub support merging caches with identical attributes, so dm-bufio now just calls kmem_cache_create and relies on implicit merging. This uncovered a bug in the slub subsystem - if we delete a cache and immediatelly create another cache with the same attributes, it fails because of duplicate filename in /sys/kernel/slab/. The slub subsystem offloads freeing the cache to a workqueue - and if we create the new cache before the workqueue runs, it complains because of duplicate filename in sysfs. This patch fixes the bug by moving the call of kobject_del from sysfs_slab_remove_workfn to shutdown_cache. kobject_del must be called while we hold slab_mutex - so that the sysfs entry is deleted before a cache with the same attributes could be created. Running device-mapper-test-suite with: dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/ triggered: Buffer I/O error on dev dm-0, logical block 1572848, async page read device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5 device-mapper: thin: 253:1: aborting current metadata transaction sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144' CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25 Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017 Workqueue: dm-thin do_worker [dm_thin_pool] Call Trace: dump_stack+0x5a/0x73 sysfs_warn_dup+0x58/0x70 sysfs_create_dir_ns+0x77/0x80 kobject_add_internal+0xba/0x2e0 kobject_init_and_add+0x70/0xb0 sysfs_slab_add+0xb1/0x250 __kmem_cache_create+0x116/0x150 create_cache+0xd9/0x1f0 kmem_cache_create_usercopy+0x1c1/0x250 kmem_cache_create+0x18/0x20 dm_bufio_client_create+0x1ae/0x410 [dm_bufio] dm_block_manager_create+0x5e/0x90 [dm_persistent_data] __create_persistent_data_objects+0x38/0x940 [dm_thin_pool] dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool] metadata_operation_failed+0x59/0x100 [dm_thin_pool] alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool] process_cell+0x2a3/0x550 [dm_thin_pool] do_worker+0x28d/0x8f0 [dm_thin_pool] process_one_work+0x171/0x370 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory. kmem_cache_create(dm_bufio_buffer-16) failed with error -17 Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Mikulas Patocka Reported-by: Mike Snitzer Tested-by: Mike Snitzer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++++ mm/slab_common.c | 4 ++++ mm/slub.c | 7 ++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 09fa2c6f0e68..3a1a1dbc6f49 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -155,8 +155,12 @@ struct kmem_cache { #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS +void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); #else +static inline void sysfs_slab_unlink(struct kmem_cache *s) +{ +} static inline void sysfs_slab_release(struct kmem_cache *s) { } diff --git a/mm/slab_common.c b/mm/slab_common.c index 890b1f04a03a..2296caf87bfb 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -567,10 +567,14 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); +#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { #ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); sysfs_slab_release(s); #else slab_kmem_cache_release(s); diff --git a/mm/slub.c b/mm/slub.c index a3b8467c14af..51258eff4178 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5667,7 +5667,6 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); out: kobject_put(&s->kobj); } @@ -5752,6 +5751,12 @@ static void sysfs_slab_remove(struct kmem_cache *s) schedule_work(&s->kobj_remove_work); } +void sysfs_slab_unlink(struct kmem_cache *s) +{ + if (slab_state >= FULL) + kobject_del(&s->kobj); +} + void sysfs_slab_release(struct kmem_cache *s) { if (slab_state >= FULL) -- cgit v1.2.3