From 6e916a7eb1bc045f4e27355632ee7692014e6e60 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 3 May 2018 18:51:32 +0800 Subject: bcache: store disk name in struct cache and struct cached_dev Current code uses bdevname() or bio_devname() to reference gendisk disk name when bcache needs to display the disk names in kernel message. It was safe before bcache device failure handling patch set merged in, because when devices are failed, there was deadlock to prevent bcache printing error messages with gendisk disk name. But after the failure handling patch set merged, the deadlock is fixed, so it is possible that the gendisk structure bdev->hd_disk is released when bdevname() is called to reference bdev->bd_disk->disk_name[]. This is why I receive bug report of NULL pointers deference panic. This patch stores gendisk disk name in a buffer inside struct cache and struct cached_dev, then print out the offline device name won't reference bdev->hd_disk anymore. And this patch also avoids extra function calls of bdevname() and bio_devnmae(). Changelog: v3, add Reviewed-by from Hannes. v2, call bdevname() earlier in register_bdev() v1, first version with segguestion from Junhui Tang. Fixes: c7b7bd07404c5 ("bcache: add io_disable to struct cached_dev") Fixes: 5138ac6748e38 ("bcache: fix misleading error message in bch_count_io_errors()") Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 4 ++++ drivers/md/bcache/debug.c | 3 +-- drivers/md/bcache/io.c | 8 +++----- drivers/md/bcache/request.c | 5 +---- drivers/md/bcache/super.c | 44 +++++++++++++++++++++----------------------- 5 files changed, 30 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d338b7086013..3a0cfb237af9 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -392,6 +392,8 @@ struct cached_dev { #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 atomic_t io_errors; unsigned error_limit; + + char backing_dev_name[BDEVNAME_SIZE]; }; enum alloc_reserve { @@ -464,6 +466,8 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; + + char cache_dev_name[BDEVNAME_SIZE]; }; struct gc_stat { diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 028f7b386e01..4e63c6f6c04d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -106,7 +106,6 @@ void bch_btree_verify(struct btree *b) void bch_data_verify(struct cached_dev *dc, struct bio *bio) { - char name[BDEVNAME_SIZE]; struct bio *check; struct bio_vec bv, cbv; struct bvec_iter iter, citer = { 0 }; @@ -134,7 +133,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) bv.bv_len), dc->disk.c, "verify failed at dev %s sector %llu", - bdevname(dc->bdev, name), + dc->backing_dev_name, (uint64_t) bio->bi_iter.bi_sector); kunmap_atomic(p1); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 7fac97ae036e..2ddf8515e6a5 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -52,7 +52,6 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, /* IO errors */ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) { - char buf[BDEVNAME_SIZE]; unsigned errors; WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); @@ -60,7 +59,7 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) pr_err("%s: IO error on backing device, unrecoverable", - bio_devname(bio, buf)); + dc->backing_dev_name); else bch_cached_dev_error(dc); } @@ -105,19 +104,18 @@ void bch_count_io_errors(struct cache *ca, } if (error) { - char buf[BDEVNAME_SIZE]; unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, &ca->io_errors); errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) pr_err("%s: IO error on %s%s", - bdevname(ca->bdev, buf), m, + ca->cache_dev_name, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, "%s: too many IO errors %s", - bdevname(ca->bdev, buf), m); + ca->cache_dev_name, m); } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a65e3365eeb9..8e3e8655ed63 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -649,11 +649,8 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - char buf[BDEVNAME_SIZE]; - - bio_devname(bio, buf); pr_err("Can't flush %s: returned bi_status %i", - buf, bio->bi_status); + dc->backing_dev_name, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ s->iop.status = bio->bi_status; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d90d9e59ca00..8196b19fada2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -936,7 +936,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); - char buf[BDEVNAME_SIZE]; struct closure cl; closure_init_stack(&cl); @@ -967,7 +966,7 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s", bdevname(dc->bdev, buf)); + pr_info("Caching disabled for %s", dc->backing_dev_name); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -999,29 +998,28 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, { uint32_t rtime = cpu_to_le32(get_seconds()); struct uuid_entry *u; - char buf[BDEVNAME_SIZE]; struct cached_dev *exist_dc, *t; - bdevname(dc->bdev, buf); - if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) || (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached", buf); + pr_err("Can't attach %s: already attached", + dc->backing_dev_name); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down", buf); + pr_err("Can't attach %s: shutting down", + dc->backing_dev_name); return -EINVAL; } if (dc->sb.block_size < c->sb.block_size) { /* Will die */ pr_err("Couldn't attach %s: block size less than set's block size", - buf); + dc->backing_dev_name); return -EINVAL; } @@ -1029,7 +1027,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { pr_err("Tried to attach %s but duplicate UUID already attached", - buf); + dc->backing_dev_name); return -EINVAL; } @@ -1047,13 +1045,15 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set", buf); + pr_err("Couldn't find uuid for %s in set", + dc->backing_dev_name); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID", buf); + pr_err("Not caching %s, no room for UUID", + dc->backing_dev_name); return -EINVAL; } } @@ -1112,7 +1112,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, up_write(&dc->writeback_lock); pr_info("Caching %s as %s on set %pU", - bdevname(dc->bdev, buf), dc->disk.disk->disk_name, + dc->backing_dev_name, + dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); return 0; } @@ -1225,10 +1226,10 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cached_dev *dc) { - char name[BDEVNAME_SIZE]; const char *err = "cannot allocate memory"; struct cache_set *c; + bdevname(bdev, dc->backing_dev_name); memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; dc->bdev->bd_holder = dc; @@ -1237,6 +1238,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page; get_page(sb_page); + if (cached_dev_init(dc, sb->block_size << 9)) goto err; @@ -1247,7 +1249,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s", bdevname(bdev, name)); + pr_info("registered backing device %s", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); list_for_each_entry(c, &bch_cache_sets, list) @@ -1259,7 +1261,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, return; err: - pr_notice("error %s: %s", bdevname(bdev, name), err); + pr_notice("error %s: %s", dc->backing_dev_name, err); bcache_device_stop(&dc->disk); } @@ -1367,8 +1369,6 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) bool bch_cached_dev_error(struct cached_dev *dc) { - char name[BDEVNAME_SIZE]; - if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) return false; @@ -1377,7 +1377,7 @@ bool bch_cached_dev_error(struct cached_dev *dc) smp_mb(); pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, bdevname(dc->bdev, name)); + dc->disk.disk->disk_name, dc->backing_dev_name); bcache_device_stop(&dc->disk); return true; @@ -2003,12 +2003,10 @@ static int cache_alloc(struct cache *ca) static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { - char name[BDEVNAME_SIZE]; const char *err = NULL; /* must be set for any error case */ int ret = 0; - bdevname(bdev, name); - + bdevname(bdev, ca->cache_dev_name); memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; ca->bdev->bd_holder = ca; @@ -2045,14 +2043,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, goto out; } - pr_info("registered cache device %s", name); + pr_info("registered cache device %s", ca->cache_dev_name); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s", name, err); + pr_notice("error %s: %s", ca->cache_dev_name, err); return ret; } -- cgit v1.2.3 From 6147305c73e4511ca1a975b766b97a779d442567 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 3 May 2018 18:51:33 +0800 Subject: bcache: set CACHE_SET_IO_DISABLE in bch_cached_dev_error() Commit c7b7bd07404c5 ("bcache: add io_disable to struct cached_dev") tries to stop bcache device by calling bcache_device_stop() when too many I/O errors happened on backing device. But if there is internal I/O happening on cache device (writeback scan, garbage collection, etc), a regular I/O request triggers the internal I/Os may still holds a refcount of dc->count, and the refcount may only be dropped after the internal I/O stopped. By this patch, bch_cached_dev_error() will check if the backing device is attached to a cache set, if yes that CACHE_SET_IO_DISABLE will be set to flags of this cache set. Then internal I/Os on cache device will be rejected and stopped immediately, and the bcache device can be stopped. For people who are not familiar with the interesting refcount dependance, let me explain a bit more how the fix works. Example the writeback thread will scan cache device for dirty data writeback purpose. Before it stopps, it holds a refcount of dc->count. When CACHE_SET_IO_DISABLE bit is set, the internal I/O will stopped and the while-loop in bch_writeback_thread() quits and calls cached_dev_put() to drop dc->count. If this is the last refcount to drop, then cached_dev_detach_finish() will be called. In this call back function, in turn closure_put(dc->disk.cl) is called to drop a refcount of closure dc->disk.cl. If this is the last refcount of this closure to drop, then cached_dev_flush() will be called. Then the cached device is freed. So if CACHE_SET_IO_DISABLE is not set, the bache device can not be stopped until all inernal cache device I/O stopped. For large size cache device, and writeback thread competes locks with gc thread, there might be a quite long time to wait. Fixes: c7b7bd07404c5 ("bcache: add io_disable to struct cached_dev") Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8196b19fada2..c017cd444c66 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,6 +1369,8 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) bool bch_cached_dev_error(struct cached_dev *dc) { + struct cache_set *c; + if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) return false; @@ -1379,6 +1381,21 @@ bool bch_cached_dev_error(struct cached_dev *dc) pr_err("stop %s: too many IO errors on backing device %s\n", dc->disk.disk->disk_name, dc->backing_dev_name); + /* + * If the cached device is still attached to a cache set, + * even dc->io_disable is true and no more I/O requests + * accepted, cache device internal I/O (writeback scan or + * garbage collection) may still prevent bcache device from + * being stopped. So here CACHE_SET_IO_DISABLE should be + * set to c->flags too, to make the internal I/O to cache + * device rejected and stopped immediately. + * If c is NULL, that means the bcache device is not attached + * to any cache set, then no CACHE_SET_IO_DISABLE bit to set. + */ + c = dc->disk.c; + if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) + pr_info("CACHE_SET_IO_DISABLE already set"); + bcache_device_stop(&dc->disk); return true; } -- cgit v1.2.3 From bf78980fcc58bad2d61858ce342153a3dd097aa0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 3 May 2018 18:51:34 +0800 Subject: bcache: count backing device I/O error for writeback I/O Commit c7b7bd07404c5 ("bcache: add io_disable to struct cached_dev") counts backing device I/O requets and set dc->io_disable to true if error counters exceeds dc->io_error_limit. But it only counts I/O errors for regular I/O request, neglects errors of write back I/Os when backing device is offline. This patch counts the errors of writeback I/Os, in dirty_endio() if bio->bi_status is not 0, it means error happens when writing dirty keys to backing device, then bch_count_backing_io_errors() is called. By this fix, even there is no reqular I/O request coming, if writeback I/O errors exceed dc->io_error_limit, the bcache device may still be stopped for the broken backing device. Fixes: c7b7bd07404c5 ("bcache: add io_disable to struct cached_dev") Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 4a9547cdcdc5..ad45ebe1a74b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -244,8 +244,10 @@ static void dirty_endio(struct bio *bio) struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; - if (bio->bi_status) + if (bio->bi_status) { SET_KEY_DIRTY(&w->key, false); + bch_count_backing_io_errors(io->dc, bio); + } closure_put(&io->cl); } -- cgit v1.2.3 From ecb2ba8cb83549f1bb06bc7e693ae8fed43c0e4f Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 3 May 2018 18:51:35 +0800 Subject: bcache: add wait_for_kthread_stop() in bch_allocator_thread() When CACHE_SET_IO_DISABLE is set on cache set flags, bcache allocator thread routine bch_allocator_thread() may stop the while-loops and exit. Then it is possible to observe the following kernel oops message, [ 631.068366] bcache: bch_btree_insert() error -5 [ 631.069115] bcache: cached_dev_detach_finish() Caching disabled for sdf [ 631.070220] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 631.070250] PGD 0 P4D 0 [ 631.070261] Oops: 0002 [#1] SMP PTI [snipped] [ 631.070578] Workqueue: events cache_set_flush [bcache] [ 631.070597] RIP: 0010:exit_creds+0x1b/0x50 [ 631.070610] RSP: 0018:ffffc9000705fe08 EFLAGS: 00010246 [ 631.070626] RAX: 0000000000000001 RBX: ffff880a622ad300 RCX: 000000000000000b [ 631.070645] RDX: 0000000000000601 RSI: 000000000000000c RDI: 0000000000000000 [ 631.070663] RBP: ffff880a622ad300 R08: ffffea00190c66e0 R09: 0000000000000200 [ 631.070682] R10: ffff880a48123000 R11: ffff880000000000 R12: 0000000000000000 [ 631.070700] R13: ffff880a4b160e40 R14: ffff880a4b160000 R15: 0ffff880667e2530 [ 631.070719] FS: 0000000000000000(0000) GS:ffff880667e00000(0000) knlGS:0000000000000000 [ 631.070740] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 631.070755] CR2: 0000000000000000 CR3: 000000000200a001 CR4: 00000000003606e0 [ 631.070774] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 631.070793] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 631.070811] Call Trace: [ 631.070828] __put_task_struct+0x55/0x160 [ 631.070845] kthread_stop+0xee/0x100 [ 631.070863] cache_set_flush+0x11d/0x1a0 [bcache] [ 631.070879] process_one_work+0x146/0x340 [ 631.070892] worker_thread+0x47/0x3e0 [ 631.070906] kthread+0xf5/0x130 [ 631.070917] ? max_active_store+0x60/0x60 [ 631.070930] ? kthread_bind+0x10/0x10 [ 631.070945] ret_from_fork+0x35/0x40 [snipped] [ 631.071017] RIP: exit_creds+0x1b/0x50 RSP: ffffc9000705fe08 [ 631.071033] CR2: 0000000000000000 [ 631.071045] ---[ end trace 011c63a24b22c927 ]--- [ 631.071085] bcache: bcache_device_free() bcache0 stopped The reason is when cache_set_flush() tries to call kthread_stop() to stop allocator thread, but it exits already due to cache device I/O errors. This patch adds wait_for_kthread_stop() at tail of bch_allocator_thread(), to prevent the thread routine exiting directly. Then the allocator thread can be blocked at wait_for_kthread_stop() and wait for cache_set_flush() to stop it by calling kthread_stop(). changelog: v3: add Reviewed-by from Hannnes. v2: not directly return from allocator_wait(), move 'return 0' to tail of bch_allocator_thread(). v1: initial version. Fixes: 771f393e8ffc ("bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags") Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 004cc3cc6123..7fa2631b422c 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -290,7 +290,7 @@ do { \ if (kthread_should_stop() || \ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \ set_current_state(TASK_RUNNING); \ - return 0; \ + goto out; \ } \ \ schedule(); \ @@ -378,6 +378,9 @@ retry_invalidate: bch_prio_write(ca); } } +out: + wait_for_kthread_stop(); + return 0; } /* Allocation */ -- cgit v1.2.3 From 4fd8e13843978cbba48b8c21119da60c7fd5910d Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 3 May 2018 18:51:36 +0800 Subject: bcache: set dc->io_disable to true in conditional_stop_bcache_device() Commit 7e027ca4b534b ("bcache: add stop_when_cache_set_failed option to backing device") adds stop_when_cache_set_failed option and stops bcache device if stop_when_cache_set_failed is auto and there is dirty data on broken cache device. There might exists a small time gap that the cache set is released and set to NULL but bcache device is not released yet (because they are released in parallel). During this time gap, dc->c is NULL so CACHE_SET_IO_DISABLE won't be checked, and dc->io_disable is still false, so new coming I/O requests will be accepted and directly go into backing device as no cache set attached to. If there is dirty data on cache device, this behavior may introduce potential inconsistent data. This patch sets dc->io_disable to true before calling bcache_device_stop() to make sure the backing device will reject new coming I/O request as well, so even in the small time gap no I/O will directly go into backing device to corrupt data consistency. Fixes: 7e027ca4b534b ("bcache: add stop_when_cache_set_failed option to backing device") Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c017cd444c66..cedbb41533c2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1556,6 +1556,20 @@ static void conditional_stop_bcache_device(struct cache_set *c, */ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", d->disk->disk_name); + /* + * There might be a small time gap that cache set is + * released but bcache device is not. Inside this time + * gap, regular I/O requests will directly go into + * backing device as no cache set attached to. This + * behavior may also introduce potential inconsistence + * data in writeback mode while cache is dirty. + * Therefore before calling bcache_device_stop() due + * to a broken cache device, dc->io_disable should be + * explicitly set to true. + */ + dc->io_disable = true; + /* make others know io_disable is true earlier */ + smp_mb(); bcache_device_stop(d); } else { /* -- cgit v1.2.3 From 09a44ca2114737e0932257619c16a2b50c7807f1 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 3 May 2018 18:51:37 +0800 Subject: bcache: use pr_info() to inform duplicated CACHE_SET_IO_DISABLE set It is possible that multiple I/O requests hits on failed cache device or backing device, therefore it is quite common that CACHE_SET_IO_DISABLE is set already when a task tries to set the bit from bch_cache_set_error(). Currently the message "CACHE_SET_IO_DISABLE already set" is printed by pr_warn(), which might mislead users to think a serious fault happens in source code. This patch uses pr_info() to print the information in such situation, avoid extra worries. This information is helpful to understand bcache behavior in cache device failures, so I still keep them in source code. Fixes: 771f393e8ffc9 ("bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags") Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index cedbb41533c2..3dea06b41d43 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1412,7 +1412,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) return false; if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already set"); + pr_info("CACHE_SET_IO_DISABLE already set"); /* XXX: we can be called from atomic context acquire_console_sem(); -- cgit v1.2.3 From 59a2f3f00fd744dbad22593f47552037d3154ca6 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 14 Apr 2018 20:06:19 +0800 Subject: nvme: fix potential memory leak in option parsing When specifying same string type option several times, current option parsing may cause memory leak. Hence, call kfree for previous one in this case. Signed-off-by: Chengguang Xu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/fabrics.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 124c458806df..7ae732a77fe8 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -668,6 +668,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->transport); opts->transport = p; break; case NVMF_OPT_NQN: @@ -676,6 +677,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->subsysnqn); opts->subsysnqn = p; nqnlen = strlen(opts->subsysnqn); if (nqnlen >= NVMF_NQN_SIZE) { @@ -698,6 +700,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->traddr); opts->traddr = p; break; case NVMF_OPT_TRSVCID: @@ -706,6 +709,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->trsvcid); opts->trsvcid = p; break; case NVMF_OPT_QUEUE_SIZE: @@ -792,6 +796,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } + nvmf_host_put(opts->host); opts->host = nvmf_host_add(p); kfree(p); if (!opts->host) { @@ -817,6 +822,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->host_traddr); opts->host_traddr = p; break; case NVMF_OPT_HOST_ID: -- cgit v1.2.3 From f31a21103c03bb62846409fdc60cc9faf2398cfb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 17 Apr 2018 14:42:44 -0600 Subject: nvme: Set integrity flag for user passthrough commands If the command a separate metadata buffer attached, the request needs to have the integrity flag set so the driver knows to map it. Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9df4f71e58ca..127a9cbf3314 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -764,6 +764,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, ret = PTR_ERR(meta); goto out_unmap; } + req->cmd_flags |= REQ_INTEGRITY; } } -- cgit v1.2.3 From 5cadde8019a6a80550fdde92d5a3327565974eab Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 26 Apr 2018 14:24:29 -0600 Subject: nvme/multipath: Disable runtime writable enabling parameter We can't allow the user to change multipath settings at runtime, as this will create naming conflicts due to the different naming schemes used for each mode. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 956e0b8e9c4d..3ded009e7631 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -15,7 +15,7 @@ #include "nvme.h" static bool multipath = true; -module_param(multipath, bool, 0644); +module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); -- cgit v1.2.3 From a785dbccd95c37606c720580714f5a7a8b3255f1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 26 Apr 2018 14:22:41 -0600 Subject: nvme/multipath: Fix multipath disabled naming collisions When CONFIG_NVME_MULTIPATH is set, but we're not using nvme to multipath, namespaces with multiple paths were not creating unique names due to reusing the same instance number from the namespace's head. This patch fixes this by falling back to the non-multipath naming method when the parameter disabled using multipath. Reported-by: Mike Snitzer Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 26 +------------------------- drivers/nvme/host/multipath.c | 22 ++++++++++++++++++++++ drivers/nvme/host/nvme.h | 12 ++++++++++++ 3 files changed, 35 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 127a9cbf3314..a3771c5729f5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2998,31 +2998,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id)) goto out_free_id; nvme_setup_streams_ns(ctrl, ns); - -#ifdef CONFIG_NVME_MULTIPATH - /* - * If multipathing is enabled we need to always use the subsystem - * instance number for numbering our devices to avoid conflicts - * between subsystems that have multiple controllers and thus use - * the multipath-aware subsystem node and those that have a single - * controller and use the controller node directly. - */ - if (ns->head->disk) { - sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, - ctrl->cntlid, ns->head->instance); - flags = GENHD_FL_HIDDEN; - } else { - sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, - ns->head->instance); - } -#else - /* - * But without the multipath code enabled, multiple controller per - * subsystems are visible as devices and thus we cannot use the - * subsystem instance. - */ - sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); -#endif + nvme_set_disk_name(disk_name, ns, ctrl, &flags); if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3ded009e7631..d7b664ae5923 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -19,6 +19,28 @@ module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); +/* + * If multipathing is enabled we need to always use the subsystem instance + * number for numbering our devices to avoid conflicts between subsystems that + * have multiple controllers and thus use the multipath-aware subsystem node + * and those that have a single controller and use the controller node + * directly. + */ +void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags) +{ + if (!multipath) { + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + } else if (ns->head->disk) { + sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->cntlid, ns->head->instance); + *flags = GENHD_FL_HIDDEN; + } else { + sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } +} + void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 061fecfd44f5..7ded7a51c430 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -436,6 +436,8 @@ extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH +void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags); void nvme_failover_req(struct request *req); bool nvme_req_needs_failover(struct request *req, blk_status_t error); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); @@ -461,6 +463,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) } #else +/* + * Without the multipath code enabled, multiple controller per subsystems are + * visible as devices and thus we cannot use the subsystem instance. + */ +static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags) +{ + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); +} + static inline void nvme_failover_req(struct request *req) { } -- cgit v1.2.3 From 8bfc3b4c6f9de815de4ab73784b9419348266a65 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 3 May 2018 17:00:35 +0200 Subject: nvmet: switch loopback target state to connecting when resetting After commit bb06ec31452f ("nvme: expand nvmf_check_if_ready checks") resetting of the loopback nvme target failed as we forgot to switch it's state to NVME_CTRL_CONNECTING before we reconnect the admin queues. Therefore the checks in nvmf_check_if_ready() choose to go to the reject_io case and thus we couldn't sent out an identify controller command to reconnect. Change the controller state to NVME_CTRL_CONNECTING after tearing down the old connection and before re-establishing the connection. Fixes: bb06ec31452f ("nvme: expand nvmf_check_if_ready checks") Signed-off-by: Johannes Thumshirn Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/target/loop.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 31fdfba556a8..27a8561c0cb9 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -469,6 +469,12 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_stop_ctrl(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + /* state change failure should never happen */ + WARN_ON_ONCE(1); + return; + } + ret = nvme_loop_configure_admin_queue(ctrl); if (ret) goto out_disable; -- cgit v1.2.3