From b234c6d7a703661b5045c5bf569b7c99d2edbf88 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:09 -0700 Subject: dm zoned: improve error handling in reclaim There are several places in reclaim code where errors are not propagated to the main function, dmz_reclaim(). This function is responsible for unlocking zones that might be still locked at the end of any failed reclaim iterations. As the result, some device zones may be left permanently locked for reclaim, degrading target's capability to reclaim zones. This patch fixes these issues as follows - Make sure that dmz_reclaim_buf(), dmz_reclaim_seq_data() and dmz_reclaim_rnd_data() return error codes to the caller. dmz_reclaim() function is renamed to dmz_do_reclaim() to avoid clashing with "struct dmz_reclaim" and is modified to return the error to the caller. dmz_get_zone_for_reclaim() now returns an error instead of NULL pointer and reclaim code checks for that error. Error logging/debug messages are added where necessary. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-reclaim.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm-zoned-reclaim.c') diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index edf4b95eb075..e381354dc136 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -215,7 +215,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -259,7 +259,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -312,7 +312,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -334,7 +334,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone) /* * Find a candidate zone for reclaim and process it. */ -static void dmz_reclaim(struct dmz_reclaim *zrc) +static int dmz_do_reclaim(struct dmz_reclaim *zrc) { struct dmz_metadata *zmd = zrc->metadata; struct dm_zone *dzone; @@ -344,8 +344,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) /* Get a data zone */ dzone = dmz_get_zone_for_reclaim(zmd); - if (!dzone) - return; + if (IS_ERR(dzone)) + return PTR_ERR(dzone); start = jiffies; @@ -391,13 +391,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) out: if (ret) { dmz_unlock_zone_reclaim(dzone); - return; + return ret; } - (void) dmz_flush_metadata(zrc->metadata); + ret = dmz_flush_metadata(zrc->metadata); + if (ret) { + dmz_dev_debug(zrc->dev, + "Metadata flush for zone %u failed, err %d\n", + dmz_id(zmd, rzone), ret); + return ret; + } dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); + return 0; } /* @@ -442,6 +449,7 @@ static void dmz_reclaim_work(struct work_struct *work) struct dmz_metadata *zmd = zrc->metadata; unsigned int nr_rnd, nr_unmap_rnd; unsigned int p_unmap_rnd; + int ret; if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); @@ -471,7 +479,9 @@ static void dmz_reclaim_work(struct work_struct *work) (dmz_target_idle(zrc) ? "Idle" : "Busy"), p_unmap_rnd, nr_unmap_rnd, nr_rnd); - dmz_reclaim(zrc); + ret = dmz_do_reclaim(zrc); + if (ret) + dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); dmz_schedule_reclaim(zrc); } -- cgit v1.2.3 From 75d66ffb48efb30f2dd42f041ba8b39c5b2bd115 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:11 -0700 Subject: dm zoned: properly handle backing device failure dm-zoned is observed to lock up or livelock in case of hardware failure or some misconfiguration of the backing zoned device. This patch adds a new dm-zoned target function that checks the status of the backing device. If the request queue of the backing device is found to be in dying state or the SCSI backing device enters offline state, the health check code sets a dm-zoned target flag prompting all further incoming I/O to be rejected. In order to detect backing device failures timely, this new function is called in the request mapping path, at the beginning of every reclaim run and before performing any metadata I/O. The proper way out of this situation is to do dmsetup remove and recreate the target when the problem with the backing device is resolved. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 51 +++++++++++++++++++++++++++++++++--------- drivers/md/dm-zoned-reclaim.c | 18 +++++++++++++-- drivers/md/dm-zoned-target.c | 45 +++++++++++++++++++++++++++++++++++-- drivers/md/dm-zoned.h | 10 +++++++++ 4 files changed, 110 insertions(+), 14 deletions(-) (limited to 'drivers/md/dm-zoned-reclaim.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 935d9be5af39..2882897aece2 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -402,15 +402,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return ERR_PTR(-EIO); + /* Get a new block and a BIO to read it */ mblk = dmz_alloc_mblock(zmd, mblk_no); if (!mblk) - return NULL; + return ERR_PTR(-ENOMEM); bio = bio_alloc(GFP_NOIO, 1); if (!bio) { dmz_free_mblock(zmd, mblk); - return NULL; + return ERR_PTR(-ENOMEM); } spin_lock(&zmd->mblk_lock); @@ -541,8 +544,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, if (!mblk) { /* Cache miss: read the block from disk */ mblk = dmz_get_mblock_slow(zmd, mblk_no); - if (!mblk) - return ERR_PTR(-ENOMEM); + if (IS_ERR(mblk)) + return mblk; } /* Wait for on-going read I/O and check for error */ @@ -570,16 +573,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) /* * Issue a metadata block write BIO. */ -static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, - unsigned int set) +static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, + unsigned int set) { sector_t block = zmd->sb[set].block + mblk->no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) { set_bit(DMZ_META_ERROR, &mblk->state); - return; + return -ENOMEM; } set_bit(DMZ_META_WRITING, &mblk->state); @@ -591,6 +597,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); + + return 0; } /* @@ -602,6 +610,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, struct bio *bio; int ret; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) return -ENOMEM; @@ -659,22 +670,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, { struct dmz_mblock *mblk; struct blk_plug plug; - int ret = 0; + int ret = 0, nr_mblks_submitted = 0; /* Issue writes */ blk_start_plug(&plug); - list_for_each_entry(mblk, write_list, link) - dmz_write_mblock(zmd, mblk, set); + list_for_each_entry(mblk, write_list, link) { + ret = dmz_write_mblock(zmd, mblk, set); + if (ret) + break; + nr_mblks_submitted++; + } blk_finish_plug(&plug); /* Wait for completion */ list_for_each_entry(mblk, write_list, link) { + if (!nr_mblks_submitted) + break; wait_on_bit_io(&mblk->state, DMZ_META_WRITING, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); ret = -EIO; } + nr_mblks_submitted--; } /* Flush drive cache (this will also sync data) */ @@ -736,6 +754,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ dmz_lock_flush(zmd); + if (dmz_bdev_is_dying(zmd->dev)) { + ret = -EIO; + goto out; + } + /* Get dirty blocks */ spin_lock(&zmd->mblk_lock); list_splice_init(&zmd->mblk_dirty_list, &write_list); @@ -1631,6 +1654,10 @@ again: /* Alloate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + dzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } @@ -1728,6 +1755,10 @@ again: /* Alloate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + bzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index e381354dc136..9470b8f77a33 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -37,7 +37,7 @@ enum { /* * Number of seconds of target BIO inactivity to consider the target idle. */ -#define DMZ_IDLE_PERIOD (10UL * HZ) +#define DMZ_IDLE_PERIOD (10UL * HZ) /* * Percentage of unmapped (free) random zones below which reclaim starts @@ -134,6 +134,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, set_bit(DM_KCOPYD_WRITE_SEQ, &flags); while (block < end_block) { + if (dev->flags & DMZ_BDEV_DYING) + return -EIO; + /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, src_zone, &block); if (ret <= 0) @@ -451,6 +454,9 @@ static void dmz_reclaim_work(struct work_struct *work) unsigned int p_unmap_rnd; int ret; + if (dmz_bdev_is_dying(zrc->dev)) + return; + if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); return; @@ -480,8 +486,16 @@ static void dmz_reclaim_work(struct work_struct *work) p_unmap_rnd, nr_unmap_rnd, nr_rnd); ret = dmz_do_reclaim(zrc); - if (ret) + if (ret) { dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); + if (ret == -EIO) + /* + * LLD might be performing some error handling sequence + * at the underlying device. To not interfere, do not + * attempt to schedule the next reclaim run immediately. + */ + return; + } dmz_schedule_reclaim(zrc); } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 944db71ed3d7..ff3fd011796e 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -133,6 +133,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, refcount_inc(&bioctx->ref); generic_make_request(clone); + if (clone->bi_status == BLK_STS_IOERR) + return -EIO; if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) zone->wp_block += nr_blocks; @@ -277,8 +279,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, /* Get the buffer zone. One will be allocated if needed */ bzone = dmz_get_chunk_buffer(zmd, zone); - if (!bzone) - return -ENOSPC; + if (IS_ERR(bzone)) + return PTR_ERR(bzone); if (dmz_is_readonly(bzone)) return -EROFS; @@ -389,6 +391,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, dmz_lock_metadata(zmd); + if (dmz->dev->flags & DMZ_BDEV_DYING) { + ret = -EIO; + goto out; + } + /* * Get the data zone mapping the chunk. There may be no * mapping for read and discard. If a mapping is obtained, @@ -493,6 +500,8 @@ static void dmz_flush_work(struct work_struct *work) /* Flush dirty metadata blocks */ ret = dmz_flush_metadata(dmz->metadata); + if (ret) + dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret); /* Process queued flush requests */ while (1) { @@ -556,6 +565,32 @@ out: return ret; } +/* + * Check the backing device availability. If it's on the way out, + * start failing I/O. Reclaim and metadata components also call this + * function to cleanly abort operation in the event of such failure. + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) +{ + struct gendisk *disk; + + if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { + disk = dmz_dev->bdev->bd_disk; + if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { + dmz_dev_warn(dmz_dev, "Backing device queue dying"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } else if (disk->fops->check_events) { + if (disk->fops->check_events(disk, 0) & + DISK_EVENT_MEDIA_CHANGE) { + dmz_dev_warn(dmz_dev, "Backing device offline"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } + } + } + + return dmz_dev->flags & DMZ_BDEV_DYING; +} + /* * Process a new BIO. */ @@ -569,6 +604,9 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) sector_t chunk_sector; int ret; + if (dmz_bdev_is_dying(dmz->dev)) + return DM_MAPIO_KILL; + dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", bio_op(bio), (unsigned long long)sector, nr_sectors, (unsigned long long)dmz_bio_chunk(dmz->dev, bio), @@ -865,6 +903,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct dmz_target *dmz = ti->private; + if (dmz_bdev_is_dying(dmz->dev)) + return -ENODEV; + *bdev = dmz->dev->bdev; return 0; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index ed8de49c9a08..93a64529f219 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -56,6 +56,8 @@ struct dmz_dev { unsigned int nr_zones; + unsigned int flags; + sector_t zone_nr_sectors; unsigned int zone_nr_sectors_shift; @@ -67,6 +69,9 @@ struct dmz_dev { (dev)->zone_nr_sectors_shift) #define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) +/* Device flags. */ +#define DMZ_BDEV_DYING (1 << 0) + /* * Zone descriptor. */ @@ -245,4 +250,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc); void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); void dmz_schedule_reclaim(struct dmz_reclaim *zrc); +/* + * Functions defined in dm-zoned-target.c + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); + #endif /* DM_ZONED_H */ -- cgit v1.2.3 From bae9a0aa331d4cc20bd73c11f91abfceda4b7b29 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Fri, 2 Aug 2019 15:02:50 -0700 Subject: dm zoned: add SPDX license identifiers Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 1 + drivers/md/dm-zoned-reclaim.c | 1 + drivers/md/dm-zoned-target.c | 1 + drivers/md/dm-zoned.h | 1 + 4 files changed, 4 insertions(+) (limited to 'drivers/md/dm-zoned-reclaim.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 2882897aece2..a033b5b1d77e 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 9470b8f77a33..8297b7558154 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ff3fd011796e..31478fef6032 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 93a64529f219..d8e70b0ade35 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * -- cgit v1.2.3 From ad1bd578bd5afdf20de0bead42d25f199601211d Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Fri, 2 Aug 2019 15:02:51 -0700 Subject: dm zoned: fix a few typos Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 8 ++++---- drivers/md/dm-zoned-reclaim.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm-zoned-reclaim.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index a033b5b1d77e..2a5bc51fd6d5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -35,7 +35,7 @@ * (1) Super block (1 block) * (2) Chunk mapping table (nr_map_blocks) * (3) Bitmap blocks (nr_bitmap_blocks) - * All metadata blocks are stored in conventional zones, starting from the + * All metadata blocks are stored in conventional zones, starting from * the first conventional zone found on disk. */ struct dmz_super { @@ -234,7 +234,7 @@ void dmz_unlock_map(struct dmz_metadata *zmd) * Lock/unlock metadata access. This is a "read" lock on a semaphore * that prevents metadata flush from running while metadata are being * modified. The actual metadata write mutual exclusion is achieved with - * the map lock and zone styate management (active and reclaim state are + * the map lock and zone state management (active and reclaim state are * mutually exclusive). */ void dmz_lock_metadata(struct dmz_metadata *zmd) @@ -1652,7 +1652,7 @@ again: if (op != REQ_OP_WRITE) goto out; - /* Alloate a random zone */ + /* Allocate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { if (dmz_bdev_is_dying(zmd->dev)) { @@ -1753,7 +1753,7 @@ again: if (bzone) goto out; - /* Alloate a random zone */ + /* Allocate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { if (dmz_bdev_is_dying(zmd->dev)) { diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 8297b7558154..d240d7ca8a8a 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -438,7 +438,7 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc) return false; /* - * If the percentage of unmappped random zones is low, + * If the percentage of unmapped random zones is low, * reclaim even if the target is busy. */ return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND; -- cgit v1.2.3