From 611bee526b4a89d49f1b9914a770bfdc101d5fb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:41 +0200 Subject: block: replace bd_set_size with bd_set_nr_sectors Replace bd_set_size with a version that takes the number of sectors instead, as that fits most of the current and future callers much better. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/block/loop.c | 4 ++-- drivers/block/nbd.c | 7 ++++--- drivers/block/pktcdvd.c | 2 +- drivers/nvme/host/nvme.h | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d3394191e168..cb1191d6e945 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -253,7 +253,7 @@ static void loop_set_size(struct loop_device *lo, loff_t size) { struct block_device *bdev = lo->lo_device; - bd_set_size(bdev, size << SECTOR_SHIFT); + bd_set_nr_sectors(bdev, size); set_capacity_revalidate_and_notify(lo->lo_disk, size, false); } @@ -1251,7 +1251,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_size(bdev, 0); + bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index edf8b632e3d2..a54f2d155a31 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -300,6 +300,7 @@ static void nbd_size_update(struct nbd_device *nbd) { struct nbd_config *config = nbd->config; struct block_device *bdev = bdget_disk(nbd->disk, 0); + sector_t nr_sectors = config->bytesize >> 9; if (config->flags & NBD_FLAG_SEND_TRIM) { nbd->disk->queue->limits.discard_granularity = config->blksize; @@ -308,10 +309,10 @@ static void nbd_size_update(struct nbd_device *nbd) } blk_queue_logical_block_size(nbd->disk->queue, config->blksize); blk_queue_physical_block_size(nbd->disk->queue, config->blksize); - set_capacity(nbd->disk, config->bytesize >> 9); + set_capacity(nbd->disk, nr_sectors); if (bdev) { if (bdev->bd_disk) { - bd_set_size(bdev, config->bytesize); + bd_set_nr_sectors(bdev, nr_sectors); set_blocksize(bdev, config->blksize); } else bdev->bd_invalidated = 1; @@ -1138,7 +1139,7 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - bd_set_size(bdev, 0); + bd_set_nr_sectors(bdev, 0); } static void nbd_parse_flags(struct nbd_device *nbd) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 1034e445680c..17f2e6ff1223 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2192,7 +2192,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) set_capacity(pd->disk, lba << 2); set_capacity(pd->bdev->bd_disk, lba << 2); - bd_set_size(pd->bdev, (loff_t)lba << 11); + bd_set_nr_sectors(pd->bdev, lba << 2); q = bdev_get_queue(pd->bdev); if (write) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2910f6caab7d..aab130f31e25 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -688,7 +688,7 @@ static inline void nvme_mpath_update_disk_size(struct gendisk *disk) struct block_device *bdev = bdget_disk(disk, 0); if (bdev) { - bd_set_size(bdev, get_capacity(disk) << SECTOR_SHIFT); + bd_set_nr_sectors(bdev, get_capacity(disk)); bdput(bdev); } } -- cgit v1.2.3 From c2b4bb8cb3741c0bacf3683e4c1ecd04c977ada3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:42 +0200 Subject: block: fix locking for struct block_device size updates Two different callers use two different mutexes for updating the block device size, which obviously doesn't help to actually protect against concurrent updates from the different callers. In addition one of the locks, bd_mutex is rather prone to deadlocks with other parts of the block stack that use it for high level synchronization. Switch to using a new spinlock protecting just the size updates, as that is all we need, and make sure everyone does the update through the proper helper. This fixes a bug reported with the nvme revalidating disks during a hot removal operation, which can currently deadlock on bd_mutex. Reported-by: Xianting Tian Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/partitions/core.c | 4 ++-- drivers/block/aoe/aoecmd.c | 4 +--- drivers/md/dm.c | 15 ++------------- drivers/s390/block/dasd_ioctl.c | 9 ++------- fs/block_dev.c | 25 ++++++++++++++----------- include/linux/blk_types.h | 1 + 6 files changed, 22 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/block/partitions/core.c b/block/partitions/core.c index 5b4869c08fb3..b1c0b50ca92d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -592,8 +592,8 @@ int bdev_resize_partition(struct block_device *bdev, int partno, if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT); + part_nr_sects_write(part, length); + bd_set_nr_sectors(bdevp, length); ret = 0; out_unlock: diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 6dba41395155..313f0b946fe2 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -900,9 +900,7 @@ aoecmd_sleepwork(struct work_struct *work) ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { - inode_lock(bd->bd_inode); - i_size_write(bd->bd_inode, (loff_t)ssize<<9); - inode_unlock(bd->bd_inode); + bd_set_nr_sectors(bd, ssize); bdput(bd); } spin_lock_irq(&d->lock); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fb0255d25e4b..3dedd9cc4fb6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2097,18 +2097,6 @@ static void event_callback(void *context) dm_issue_global_event(); } -/* - * Protected by md->suspend_lock obtained by dm_swap_table(). - */ -static void __set_size(struct mapped_device *md, sector_t size) -{ - lockdep_assert_held(&md->suspend_lock); - - set_capacity(md->disk, size); - - i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); -} - /* * Returns old map, which caller must destroy. */ @@ -2131,7 +2119,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (size != dm_get_size(md)) memset(&md->geometry, 0, sizeof(md->geometry)); - __set_size(md, size); + set_capacity(md->disk, size); + bd_set_nr_sectors(md->bdev, size); dm_table_event_callback(t, event_callback, md); diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 777734d1b4e5..faaf5596e31c 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -55,10 +55,7 @@ dasd_ioctl_enable(struct block_device *bdev) dasd_enable_device(base); /* Formatting the dasd device can change the capacity. */ - mutex_lock(&bdev->bd_mutex); - i_size_write(bdev->bd_inode, - (loff_t)get_capacity(base->block->gdp) << 9); - mutex_unlock(&bdev->bd_mutex); + bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); dasd_put_device(base); return 0; } @@ -91,9 +88,7 @@ dasd_ioctl_disable(struct block_device *bdev) * Set i_size to zero, since read, write, etc. check against this * value. */ - mutex_lock(&bdev->bd_mutex); - i_size_write(bdev->bd_inode, 0); - mutex_unlock(&bdev->bd_mutex); + bd_set_nr_sectors(bdev, 0); dasd_put_device(base); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index f52597172c8b..08158bb2e76c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -876,6 +876,7 @@ struct block_device *bdget(dev_t dev) bdev = &BDEV_I(inode)->bdev; if (inode->i_state & I_NEW) { + spin_lock_init(&bdev->bd_size_lock); bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; @@ -1290,6 +1291,7 @@ static void check_disk_size_change(struct gendisk *disk, { loff_t disk_size, bdev_size; + spin_lock(&bdev->bd_size_lock); disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { @@ -1299,11 +1301,15 @@ static void check_disk_size_change(struct gendisk *disk, disk->disk_name, bdev_size, disk_size); } i_size_write(bdev->bd_inode, disk_size); - if (bdev_size > disk_size && __invalidate_device(bdev, false)) + } + bdev->bd_invalidated = 0; + spin_unlock(&bdev->bd_size_lock); + + if (bdev_size > disk_size) { + if (__invalidate_device(bdev, false)) pr_warn("VFS: busy inodes on resized disk %s\n", disk->disk_name); } - bdev->bd_invalidated = 0; } /** @@ -1328,13 +1334,10 @@ int revalidate_disk(struct gendisk *disk) if (!(disk->flags & GENHD_FL_HIDDEN)) { struct block_device *bdev = bdget_disk(disk, 0); - if (!bdev) - return ret; - - mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev, ret == 0); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + if (bdev) { + check_disk_size_change(disk, bdev, ret == 0); + bdput(bdev); + } } return ret; } @@ -1373,9 +1376,9 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - inode_lock(bdev->bd_inode); + spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - inode_unlock(bdev->bd_inode); + spin_unlock(&bdev->bd_size_lock); } EXPORT_SYMBOL(bd_set_nr_sectors); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4ecf4fed171f..5accc2549d22 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -38,6 +38,7 @@ struct block_device { /* number of times partitions within this device have been opened. */ unsigned bd_part_count; int bd_invalidated; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; -- cgit v1.2.3 From c13f0fbc4c191aab5e95b01589ff5bbc6556e4f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:43 +0200 Subject: nvme: don't call revalidate_disk from nvme_set_queue_dying In nvme_set_queue_dying we really just want to ensure the disk and bdev sizes are set to zero. Going through revalidate_disk leads to a somewhat arcance and complex callchain relying on special behavior in a few places. Instead just lift the set_capacity directly to nvme_set_queue_dying, and rename and move the nvme_mpath_update_disk_size helper so that we can use it in nvme_set_queue_dying to propagate the size to the bdev without detours. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 33 +++++++++++++++++++++++---------- drivers/nvme/host/nvme.h | 13 ------------- 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d543bc1747fd..c7e01d9667ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -94,21 +94,34 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); +static void nvme_update_bdev_size(struct gendisk *disk) +{ + struct block_device *bdev = bdget_disk(disk, 0); + + if (bdev) { + bd_set_nr_sectors(bdev, get_capacity(disk)); + bdput(bdev); + } +} + +/* + * Prepare a queue for teardown. + * + * This must forcibly unquiesce queues to avoid blocking dispatch, and only set + * the capacity to 0 after that to avoid blocking dispatchers that may be + * holding bd_butex. This will end buffered writers dirtying pages that can't + * be synced. + */ static void nvme_set_queue_dying(struct nvme_ns *ns) { - /* - * Revalidating a dead namespace sets capacity to 0. This will end - * buffered writers dirtying pages that can't be synced. - */ if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; + blk_set_queue_dying(ns->queue); - /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); - /* - * Revalidate after unblocking dispatchers that may be holding bd_butex - */ - revalidate_disk(ns->disk); + + set_capacity(ns->disk, 0); + nvme_update_bdev_size(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -2134,7 +2147,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); - nvme_mpath_update_disk_size(ns->head->disk); + nvme_update_bdev_size(ns->head->disk); } #endif return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index aab130f31e25..87737fa32360 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -683,16 +683,6 @@ static inline void nvme_trace_bio_complete(struct request *req, trace_block_bio_complete(ns->head->disk->queue, req->bio); } -static inline void nvme_mpath_update_disk_size(struct gendisk *disk) -{ - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - bd_set_nr_sectors(bdev, get_capacity(disk)); - bdput(bdev); - } -} - extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; extern struct device_attribute subsys_attr_iopolicy; @@ -767,9 +757,6 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) { } -static inline void nvme_mpath_update_disk_size(struct gendisk *disk) -{ -} #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From c4823983538d5fdf38575b3f3ba3a0d10b7f021a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Aug 2020 09:35:33 +0200 Subject: raw: deprecate the raw driver The raw driver has been replaced by O_DIRECT support on the block device in 2002. Deprecate it to prepare for removal in a few kernel releases. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/char/raw.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 380bf518338e..ccf5bd528642 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -63,6 +63,11 @@ static int raw_open(struct inode *inode, struct file *filp) return 0; } + pr_warn_ratelimited( + "process %s (pid %d) is using the deprecated raw device\n" + "support will be removed in Linux 5.14.\n", + current->comm, current->pid); + mutex_lock(&raw_mutex); /* -- cgit v1.2.3 From 4ce790632803bf2ec47271895a59936c70df7e78 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Fri, 21 Aug 2020 09:19:15 +0800 Subject: virtio-blk: Use kobj_to_dev() instead of container_of() Use kobj_to_dev() instead of container_of() Signed-off-by: Tian Tao Reviewed-by: Bart Van Assche Reviewed-by: Stefan Hajnoczi Reviewed-by: Stefano Garzarella Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b2e48dac1ebd..ca63a41059d6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -646,7 +646,7 @@ static struct attribute *virtblk_attrs[] = { static umode_t virtblk_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; struct virtio_device *vdev = vblk->vdev; -- cgit v1.2.3 From f4ad06f2bb8476548b08f89919ee65abc4e40212 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:42 +0200 Subject: block: rename bd_invalidated Replace bd_invalidate with a new BDEV_NEED_PART_SCAN flag in a bd_flags variable to better describe the condition. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- drivers/block/nbd.c | 8 ++++---- fs/block_dev.c | 10 +++++----- include/linux/blk_types.h | 4 +++- 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index 5fc6d82e6c68..a36be970b767 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -732,7 +732,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, if (!bdev) goto exit; - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a54f2d155a31..15eed210feef 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -315,7 +315,7 @@ static void nbd_size_update(struct nbd_device *nbd) bd_set_nr_sectors(bdev, nr_sectors); set_blocksize(bdev, config->blksize); } else - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); bdput(bdev); } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -1322,7 +1322,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b return ret; if (max_part) - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); @@ -1500,9 +1500,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } else if (nbd_disconnected(nbd->config)) { - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } out: mutex_unlock(&nbd_index_mutex); diff --git a/fs/block_dev.c b/fs/block_dev.c index 7e32398608c6..1e6441dbe840 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -881,7 +881,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_part_count = 0; - bdev->bd_invalidated = 0; + bdev->bd_flags = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1365,7 +1365,7 @@ int check_disk_change(struct block_device *bdev) if (__invalidate_device(bdev, true)) pr_warn("VFS: busy inodes on changed media %s\n", disk->disk_name); - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); return 1; @@ -1390,7 +1390,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); - bdev->bd_invalidated = 0; + clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); rescan: ret = blk_drop_partitions(bdev); @@ -1514,7 +1514,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (bdev->bd_invalidated && + if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); @@ -1544,7 +1544,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (bdev->bd_invalidated && + if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 59d9150165c4..6ffa783e1633 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -19,6 +19,8 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +#define BDEV_NEED_PART_SCAN 0 + struct block_device { dev_t bd_dev; int bd_openers; @@ -37,7 +39,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - int bd_invalidated; + unsigned long bd_flags; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; -- cgit v1.2.3 From 659e56ba864d37b7ee0a49cd432205b2a5ca815e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:43 +0200 Subject: block: add a new revalidate_disk_size helper revalidate_disk is a relative awkward helper for driver use, as it first calls an optional driver method and then updates the block device size, while most callers either don't need the method call at all, or want to keep state between the caller and the called method. Add a revalidate_disk_size helper that just performs the update of the block device size from the gendisk one, and switch all drivers that do not implement ->revalidate_disk to use the new helper instead of revalidate_disk() Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 10 ++-------- drivers/block/virtio_blk.c | 2 +- drivers/block/zram/zram_drv.c | 4 ++-- drivers/md/dm-raid.c | 2 +- drivers/md/md-cluster.c | 6 +++--- drivers/md/md-linear.c | 2 +- drivers/md/md.c | 10 +++++----- fs/block_dev.c | 42 +++++++++++++++++++++++++++++------------- include/linux/genhd.h | 1 + 10 files changed, 46 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 011539039693..5d3923c0997c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4921,7 +4921,7 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; dout("setting size to %llu sectors", (unsigned long long)size); set_capacity(rbd_dev->disk, size); - revalidate_disk(rbd_dev->disk); + revalidate_disk_size(rbd_dev->disk, true); } } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index cc6a4e2587ae..157538fc8be5 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -102,18 +102,12 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, size_t new_nsectors) { - int err = 0; - rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", dev->nsectors, new_nsectors); dev->nsectors = new_nsectors; set_capacity(dev->gd, dev->nsectors); - err = revalidate_disk(dev->gd); - if (err) - rnbd_clt_err(dev, - "Failed to change device size from %zu to %zu, err: %d\n", - dev->nsectors, new_nsectors, err); - return err; + revalidate_disk_size(dev->gd, true); + return 0; } static int process_msg_open_rsp(struct rnbd_clt_dev *dev, diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ca63a41059d6..a314b9382442 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -598,7 +598,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; blk_queue_write_cache(vblk->disk->queue, writeback, false); - revalidate_disk(vblk->disk); + revalidate_disk_size(vblk->disk, true); } static const char *const virtblk_cache_types[] = { diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9100ac36670a..a356275605b1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1739,7 +1739,7 @@ static ssize_t disksize_store(struct device *dev, zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - revalidate_disk(zram->disk); + revalidate_disk_size(zram->disk, true); up_write(&zram->init_lock); return len; @@ -1786,7 +1786,7 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk(zram->disk); + revalidate_disk_size(zram->disk, true); bdput(bdev); mutex_lock(&bdev->bd_mutex); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 8d2b835d7a10..56b723d012ac 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -701,7 +701,7 @@ static void rs_set_capacity(struct raid_set *rs) struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); set_capacity(gendisk, rs->md.array_sectors); - revalidate_disk(gendisk); + revalidate_disk_size(gendisk, true); } /* diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index d50737ec4039..0580b51a156a 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -582,7 +582,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) break; case CHANGE_CAPACITY: set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); break; case RESYNCING: set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); @@ -1296,12 +1296,12 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors) pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", __func__, __LINE__); set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } else { /* revert to previous sectors */ ret = mddev->pers->resize(mddev, old_dev_sectors); if (!ret) - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); ret = __sendmsg(cinfo, &cmsg); if (ret) pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index c2ae9125c4c3..5ab22069b5be 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -202,7 +202,7 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); kfree_rcu(oldconf, rcu); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 607278207023..9562ef598ae1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5358,7 +5358,7 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) mddev->array_sectors = sectors; if (mddev->pers) { set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } } mddev_unlock(mddev); @@ -6109,7 +6109,7 @@ int do_md_run(struct mddev *mddev) md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); clear_bit(MD_NOT_READY, &mddev->flags); mddev->changed = 1; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); @@ -6427,7 +6427,7 @@ static int do_md_stop(struct mddev *mddev, int mode, set_capacity(disk, 0); mutex_unlock(&mddev->open_mutex); mddev->changed = 1; - revalidate_disk(disk); + revalidate_disk_size(disk, true); if (mddev->ro) mddev->ro = 0; @@ -7259,7 +7259,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) md_cluster_ops->update_size(mddev, old_dev_sectors); else if (mddev->queue) { set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } } return rv; @@ -9018,7 +9018,7 @@ void md_do_sync(struct md_thread *thread) mddev_unlock(mddev); if (!mddev_is_clustered(mddev)) { set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } } diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e6441dbe840..9bfe37f394bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1311,6 +1311,34 @@ static void check_disk_size_change(struct gendisk *disk, } } +/** + * revalidate_disk_size - checks for disk size change and adjusts bdev size. + * @disk: struct gendisk to check + * @verbose: if %true log a message about a size change if there is any + * + * This routine checks to see if the bdev size does not match the disk size + * and adjusts it if it differs. When shrinking the bdev size, its all caches + * are freed. + */ +void revalidate_disk_size(struct gendisk *disk, bool verbose) +{ + struct block_device *bdev; + + /* + * Hidden disks don't have associated bdev so there's no point in + * revalidating them. + */ + if (disk->flags & GENHD_FL_HIDDEN) + return; + + bdev = bdget_disk(disk, 0); + if (bdev) { + check_disk_size_change(disk, bdev, verbose); + bdput(bdev); + } +} +EXPORT_SYMBOL(revalidate_disk_size); + /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back * @disk: struct gendisk to be revalidated @@ -1325,19 +1353,7 @@ int revalidate_disk(struct gendisk *disk) if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - - /* - * Hidden disks don't have associated bdev so there's no point in - * revalidating it. - */ - if (!(disk->flags & GENHD_FL_HIDDEN)) { - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - check_disk_size_change(disk, bdev, ret == 0); - bdput(bdev); - } - } + revalidate_disk_size(disk, ret == 0); return ret; } EXPORT_SYMBOL(revalidate_disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9ea2ca31c278..f76c8baf6b7d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -371,6 +371,7 @@ int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); int revalidate_disk(struct gendisk *disk); +void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3 From b55d3d21a05d5558440515c1a73d87bc4b1fa17c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:45 +0200 Subject: nvme: opencode revalidate_disk in nvme_validate_ns Keep control in the NVMe driver instead of going through an indirect call back into ->revalidate_disk. Also reorder the function a bit to be easier to follow with the additional code. And now that we have removed all callers of revalidate_disk() in the nvme code, ->revalidate_disk is only called from the open code when first opening the device. Which is of course totally pointless as we have a valid size since the initial scan, and will get an updated view through the asynchronous notifiation everytime the size changes. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c7e01d9667ad..ea1fa41fbba8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2352,7 +2352,6 @@ static const struct block_device_operations nvme_fops = { .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, - .revalidate_disk= nvme_revalidate_disk, .report_zones = nvme_report_zones, .pr_ops = &nvme_pr_ops, }; @@ -4053,14 +4052,19 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; + int ret; ns = nvme_find_get_ns(ctrl, nsid); - if (ns) { - if (revalidate_disk(ns->disk)) - nvme_ns_remove(ns); - nvme_put_ns(ns); - } else + if (!ns) { nvme_alloc_ns(ctrl, nsid); + return; + } + + ret = nvme_revalidate_disk(ns->disk); + revalidate_disk_size(ns->disk, ret == 0); + if (ret) + nvme_ns_remove(ns); + nvme_put_ns(ns); } static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, -- cgit v1.2.3 From 033a1b98b1f8a823fb0bd985d45290016eee54ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:46 +0200 Subject: sd: open code revalidate_disk Instead of calling revalidate_disk just do the work directly by calling sd_revalidate_disk, and revalidate_disk_size where needed. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 95018e650f2d..2bec8cd52616 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -217,7 +217,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr, sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } - revalidate_disk(sdkp->disk); + sd_revalidate_disk(sdkp->disk); return count; } @@ -1706,8 +1706,10 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) static void sd_rescan(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + int ret; - revalidate_disk(sdkp->disk); + ret = sd_revalidate_disk(sdkp->disk); + revalidate_disk_size(sdkp->disk, ret == 0); } static int sd_ioctl(struct block_device *bdev, fmode_t mode, -- cgit v1.2.3 From 32f61d67570db036dba9e4fb2509b5343e28f627 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:47 +0200 Subject: nvdimm: simplify revalidate_disk handling The nvdimm block driver abuse revalidate_disk in a strange way, and totally unrelated to what other drivers do. Simplify this by just calling nvdimm_revalidate_disk (which seems rather misnamed) from the probe routines, as the additional bdev size revalidation is pointless at this point, and remove the revalidate_disk methods given that it can only be triggered from add_disk, which is right before the manual calls. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvdimm/blk.c | 3 +-- drivers/nvdimm/btt.c | 3 +-- drivers/nvdimm/bus.c | 9 +++------ drivers/nvdimm/nd.h | 2 +- drivers/nvdimm/pmem.c | 3 +-- 5 files changed, 7 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 1f718381a045..22e5617b2cea 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -226,7 +226,6 @@ static int nsblk_rw_bytes(struct nd_namespace_common *ndns, static const struct block_device_operations nd_blk_fops = { .owner = THIS_MODULE, .submit_bio = nd_blk_submit_bio, - .revalidate_disk = nvdimm_revalidate_disk, }; static void nd_blk_release_queue(void *q) @@ -284,7 +283,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) set_capacity(disk, available_disk_size >> SECTOR_SHIFT); device_add_disk(dev, disk, NULL); - revalidate_disk(disk); + nvdimm_check_and_set_ro(disk); return 0; } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 0ff610e728ff..0d710140bf93 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1513,7 +1513,6 @@ static const struct block_device_operations btt_fops = { .submit_bio = btt_submit_bio, .rw_page = btt_rw_page, .getgeo = btt_getgeo, - .revalidate_disk = nvdimm_revalidate_disk, }; static int btt_blk_init(struct btt *btt) @@ -1558,7 +1557,7 @@ static int btt_blk_init(struct btt *btt) set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; - revalidate_disk(btt->btt_disk); + nvdimm_check_and_set_ro(btt->btt_disk); return 0; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 955265656b96..2304c6183822 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -628,7 +628,7 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, } EXPORT_SYMBOL(__nd_driver_register); -int nvdimm_revalidate_disk(struct gendisk *disk) +void nvdimm_check_and_set_ro(struct gendisk *disk) { struct device *dev = disk_to_dev(disk)->parent; struct nd_region *nd_region = to_nd_region(dev->parent); @@ -639,16 +639,13 @@ int nvdimm_revalidate_disk(struct gendisk *disk) * read-only if the disk is already read-only. */ if (disk_ro || nd_region->ro == disk_ro) - return 0; + return; dev_info(dev, "%s read-only, marking %s read-only\n", dev_name(&nd_region->dev), disk->disk_name); set_disk_ro(disk, 1); - - return 0; - } -EXPORT_SYMBOL(nvdimm_revalidate_disk); +EXPORT_SYMBOL(nvdimm_check_and_set_ro); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 85c1ae813ea3..72740108ba42 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -361,7 +361,7 @@ u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region); void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_unlock(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev); -int nvdimm_revalidate_disk(struct gendisk *disk); +void nvdimm_check_and_set_ro(struct gendisk *disk); void nvdimm_drvdata_release(struct kref *kref); void put_ndd(struct nvdimm_drvdata *ndd); int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index fab29b514372..140cf3b9000c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -281,7 +281,6 @@ static const struct block_device_operations pmem_fops = { .owner = THIS_MODULE, .submit_bio = pmem_submit_bio, .rw_page = pmem_rw_page, - .revalidate_disk = nvdimm_revalidate_disk, }; static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, @@ -501,7 +500,7 @@ static int pmem_attach_disk(struct device *dev, if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; - revalidate_disk(disk); + nvdimm_check_and_set_ro(disk); pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, "badblocks"); -- cgit v1.2.3 From de09077c89183cbc627d9393706343662da7f5a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:48 +0200 Subject: block: remove revalidate_disk() Remove the now unused helper. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.h | 2 +- fs/block_dev.c | 19 ------------------- include/linux/genhd.h | 1 - 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.h b/drivers/md/md.h index d9c4e6b7e939..f9e2ccdd22c4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -397,7 +397,7 @@ struct mddev { * These locks are separate due to conflicting interactions * with bdev->bd_mutex. * Lock ordering is: - * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk + * reconfig_mutex -> bd_mutex * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open */ struct mutex open_mutex; diff --git a/fs/block_dev.c b/fs/block_dev.c index 9bfe37f394bd..9cb205405f9d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1339,25 +1339,6 @@ void revalidate_disk_size(struct gendisk *disk, bool verbose) } EXPORT_SYMBOL(revalidate_disk_size); -/** - * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back - * @disk: struct gendisk to be revalidated - * - * This routine is a wrapper for lower-level driver's revalidate_disk - * call-backs. It is used to do common pre and post operations needed - * for all revalidate_disk operations. - */ -int revalidate_disk(struct gendisk *disk) -{ - int ret = 0; - - if (disk->fops->revalidate_disk) - ret = disk->fops->revalidate_disk(disk); - revalidate_disk_size(disk, ret == 0); - return ret; -} -EXPORT_SYMBOL(revalidate_disk); - /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 02a73198b289..c618b27292fc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -370,7 +370,6 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); -int revalidate_disk(struct gendisk *disk); void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -- cgit v1.2.3 From 51db1c37ee166159c5753ce8d64d6bacf113e0f0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 19 Aug 2020 23:20:19 +0800 Subject: blk-mq: Rename BLK_MQ_F_TAG_SHARED as BLK_MQ_F_TAG_QUEUE_SHARED BLK_MQ_F_TAG_SHARED actually means that tags is shared among request queues, all of which should belong to LUNs attached to same HBA. So rename it to make the point explicitly. [jpg: rebase a few times, add rnbd-clt.c change] Suggested-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: John Garry Tested-by: Douglas Gilbert Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- block/blk-mq-tag.h | 6 +++--- block/blk-mq.c | 20 ++++++++++---------- drivers/block/rnbd/rnbd-clt.c | 2 +- include/linux/blk-mq.h | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3f09bcb8a6fd..a4597febff69 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -240,7 +240,7 @@ static const char *const alloc_policy_name[] = { #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(SHOULD_MERGE), - HCTX_FLAG_NAME(TAG_SHARED), + HCTX_FLAG_NAME(TAG_QUEUE_SHARED), HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index b1acac518c4e..918e2cee4f43 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -56,7 +56,7 @@ extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return false; return __blk_mq_tag_busy(hctx); @@ -64,7 +64,7 @@ static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return; __blk_mq_tag_idle(hctx); @@ -79,7 +79,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, { unsigned int depth, users; - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) return true; if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; diff --git a/block/blk-mq.c b/block/blk-mq.c index b3d2785eefe9..75031fa1709c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1124,7 +1124,7 @@ static bool blk_mq_get_driver_tag(struct request *rq) if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) return false; - if ((hctx->flags & BLK_MQ_F_TAG_SHARED) && + if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && !(rq->rq_flags & RQF_MQ_INFLIGHT)) { rq->rq_flags |= RQF_MQ_INFLIGHT; atomic_inc(&hctx->nr_active); @@ -1168,7 +1168,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait_queue_entry_t *wait; bool ret; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { + if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { blk_mq_sched_mark_restart_hctx(hctx); /* @@ -1420,7 +1420,7 @@ out: bool needs_restart; /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && - (hctx->flags & BLK_MQ_F_TAG_SHARED); + (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; blk_mq_release_budgets(q, nr_budgets); @@ -2668,7 +2668,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); hctx->queue = q; - hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; + hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; INIT_LIST_HEAD(&hctx->hctx_list); @@ -2885,9 +2885,9 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) queue_for_each_hw_ctx(q, hctx, i) { if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; + hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; } } @@ -2913,7 +2913,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) list_del(&q->tag_set_list); if (list_is_singular(&set->tag_list)) { /* just transitioned to unshared */ - set->flags &= ~BLK_MQ_F_TAG_SHARED; + set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, false); } @@ -2930,12 +2930,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, * Check to see if we're transitioning to shared (from 1 to 2 queues). */ if (!list_empty(&set->tag_list) && - !(set->flags & BLK_MQ_F_TAG_SHARED)) { - set->flags |= BLK_MQ_F_TAG_SHARED; + !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { + set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, true); } - if (set->flags & BLK_MQ_F_TAG_SHARED) + if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED) queue_set_hctx_shared(q, true); list_add_tail(&q->tag_set_list, &set->tag_list); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 157538fc8be5..d7a69741c0f6 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1174,7 +1174,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->queue_depth = sess->queue_depth; tag_set->numa_node = NUMA_NO_NODE; tag_set->flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_TAG_SHARED; + BLK_MQ_F_TAG_QUEUE_SHARED; tag_set->cmd_size = sizeof(struct rnbd_iu); tag_set->nr_hw_queues = num_online_cpus(); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 21a02e0577dd..982c4f92b63c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -378,7 +378,7 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, - BLK_MQ_F_TAG_SHARED = 1 << 1, + BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for * completing IO: -- cgit v1.2.3 From acdfd443b32036e9040d7413fd9c64d8da267868 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:30 +0200 Subject: amiflop: use bdev_check_media_change The Amiga floppy driver does not have a ->revalidate_disk method, so it can just use bdev_check_media_change without any additional changes. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 226219da3da6..71c2b1564558 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1670,7 +1670,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) } if (mode & (FMODE_READ|FMODE_WRITE)) { - check_disk_change(bdev); + bdev_check_media_change(bdev); if (mode & FMODE_WRITE) { int wrprot; -- cgit v1.2.3 From 22f97164ef233fe68948b1c99dc8e6077c39c2af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:31 +0200 Subject: ataflop: use bdev_check_media_change Switch to use bdev_check_media_change instead of check_disk_change and call floppy_revalidate manually. Given that floppy_revalidate only deals with media change events, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index a50e13af0305..3e881fdb06e0 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1732,7 +1732,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* invalidate the buffer track to force a reread */ BufferDrive = -1; set_bit(drive, &fake_change); - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); return 0; default: return -EINVAL; @@ -1909,7 +1910,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) return 0; if (mode & (FMODE_READ|FMODE_WRITE)) { - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); if (mode & FMODE_WRITE) { if (p->wpstat) { if (p->ref < 0) @@ -1953,7 +1955,6 @@ static const struct block_device_operations floppy_fops = { .release = floppy_release, .ioctl = fd_ioctl, .check_events = floppy_check_events, - .revalidate_disk= floppy_revalidate, }; static const struct blk_mq_ops ataflop_mq_ops = { -- cgit v1.2.3 From 4a6f3d480edc3570a8059f3a0fb388641b6ec73f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:32 +0200 Subject: floppy: use bdev_check_media_change Switch to use bdev_check_media_change instead of check_disk_change and call floppy_revalidate manually. Given that floppy_revalidate only deals with media change events, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a563b023458a..7df79ae6b0a1 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -561,6 +561,7 @@ static void floppy_release_irq_and_dma(void); * output_byte is automatically disabled when reset is set. */ static void reset_fdc(void); +static int floppy_revalidate(struct gendisk *disk); /* * These are global variables, as that's the easiest way to give @@ -3275,7 +3276,8 @@ static int invalidate_drive(struct block_device *bdev) /* invalidate the buffer track to force a reread */ set_bit((long)bdev->bd_disk->private_data, &fake_change); process_fd_request(); - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); return 0; } @@ -4123,7 +4125,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) drive_state[drive].last_checked = 0; clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) goto out; if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) @@ -4291,7 +4294,6 @@ static const struct block_device_operations floppy_fops = { .ioctl = fd_ioctl, .getgeo = fd_getgeo, .check_events = floppy_check_events, - .revalidate_disk = floppy_revalidate, #ifdef CONFIG_COMPAT .compat_ioctl = fd_compat_ioctl, #endif -- cgit v1.2.3 From 7fe76fc04bd1b551b8c712249c921facfd98a11b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:33 +0200 Subject: swim: use bdev_check_media_change Switch to use bdev_check_media_change instead of check_disk_change and call floppy_revalidate manually. Given that floppy_revalidate only deals with media change events, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/swim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/swim.c b/drivers/block/swim.c index dd34504382e5..d4565c555b28 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -217,6 +217,8 @@ extern int swim_read_sector_header(struct swim __iomem *base, extern int swim_read_sector_data(struct swim __iomem *base, unsigned char *data); +static int floppy_revalidate(struct gendisk *disk); + static DEFINE_MUTEX(swim_mutex); static inline void set_swim_mode(struct swim __iomem *base, int enable) { @@ -638,7 +640,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) return 0; if (mode & (FMODE_READ|FMODE_WRITE)) { - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); if ((mode & FMODE_WRITE) && fs->write_protected) { err = -EROFS; goto out; @@ -760,7 +763,6 @@ static const struct block_device_operations floppy_fops = { .ioctl = floppy_ioctl, .getgeo = floppy_getgeo, .check_events = floppy_check_events, - .revalidate_disk = floppy_revalidate, }; static struct kobject *floppy_find(dev_t dev, int *part, void *data) -- cgit v1.2.3 From 433d21ce313e93200cd0938aa717639c8098d45f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:34 +0200 Subject: swim: simplify media change handling floppy_revalidate mostly duplicates work already done in floppy_open despite only beeing called from floppy_open. Remove the function and just clear the ->ejected flag directly under the right condition. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/swim.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/block/swim.c b/drivers/block/swim.c index d4565c555b28..52dd1efa00f9 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -217,8 +217,6 @@ extern int swim_read_sector_header(struct swim __iomem *base, extern int swim_read_sector_data(struct swim __iomem *base, unsigned char *data); -static int floppy_revalidate(struct gendisk *disk); - static DEFINE_MUTEX(swim_mutex); static inline void set_swim_mode(struct swim __iomem *base, int enable) { @@ -640,8 +638,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) return 0; if (mode & (FMODE_READ|FMODE_WRITE)) { - if (bdev_check_media_change(bdev)) - floppy_revalidate(bdev->bd_disk); + if (bdev_check_media_change(bdev) && fs->disk_in) + fs->ejected = 0; if ((mode & FMODE_WRITE) && fs->write_protected) { err = -EROFS; goto out; @@ -738,24 +736,6 @@ static unsigned int floppy_check_events(struct gendisk *disk, return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0; } -static int floppy_revalidate(struct gendisk *disk) -{ - struct floppy_state *fs = disk->private_data; - struct swim __iomem *base = fs->swd->base; - - swim_drive(base, fs->location); - - if (fs->ejected) - setup_medium(fs); - - if (!fs->disk_in) - swim_motor(base, OFF); - else - fs->ejected = 0; - - return !fs->disk_in; -} - static const struct block_device_operations floppy_fops = { .owner = THIS_MODULE, .open = floppy_unlocked_open, -- cgit v1.2.3 From 3dcbad297b9976394190ee40913b53e50e9c83f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:35 +0200 Subject: swim3: use bdev_check_media_changed Switch to use bdev_check_media_changed instead of check_disk_change and call floppy_revalidate manually. Given that floppy_revalidate only deals with media change events, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index aa77eb5fb7de..c2d922d125e2 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -945,7 +945,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (err == 0 && (mode & FMODE_NDELAY) == 0 && (mode & (FMODE_READ|FMODE_WRITE))) { - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); if (fs->ejected) err = -ENXIO; } @@ -1055,7 +1056,6 @@ static const struct block_device_operations floppy_fops = { .release = floppy_release, .ioctl = floppy_ioctl, .check_events = floppy_check_events, - .revalidate_disk= floppy_revalidate, }; static const struct blk_mq_ops swim3_mq_ops = { -- cgit v1.2.3 From f094225bfcd6fc5c093e128ec0adb7a522b38b69 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:36 +0200 Subject: xsysace: use bdev_check_media_change Switch to use bdev_check_media_change instead of check_disk_change and call ace_revalidate_disk manually. Given that ace_revalidate_disk only deals with media change events, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/xsysace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 5d8e0ab3f054..eefe542f2d9f 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -922,7 +922,8 @@ static int ace_open(struct block_device *bdev, fmode_t mode) ace->users++; spin_unlock_irqrestore(&ace->lock, flags); - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + ace_revalidate_disk(bdev->bd_disk); mutex_unlock(&xsysace_mutex); return 0; @@ -966,7 +967,6 @@ static const struct block_device_operations ace_fops = { .open = ace_open, .release = ace_release, .check_events = ace_check_events, - .revalidate_disk = ace_revalidate_disk, .getgeo = ace_getgeo, }; -- cgit v1.2.3 From 77f93bfd0cc2e659d7135b4afcd5d0a070515bbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:37 +0200 Subject: xsysace: simplify media change handling Pass a struct ace_device to ace_revalidate_disk, move the media changed check into the one caller that needs it, and give the routine a better name. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/xsysace.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index eefe542f2d9f..8d581c7536fb 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -888,26 +888,20 @@ static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing) return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0; } -static int ace_revalidate_disk(struct gendisk *gd) +static void ace_media_changed(struct ace_device *ace) { - struct ace_device *ace = gd->private_data; unsigned long flags; - dev_dbg(ace->dev, "ace_revalidate_disk()\n"); - - if (ace->media_change) { - dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n"); + dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n"); - spin_lock_irqsave(&ace->lock, flags); - ace->id_req_count++; - spin_unlock_irqrestore(&ace->lock, flags); + spin_lock_irqsave(&ace->lock, flags); + ace->id_req_count++; + spin_unlock_irqrestore(&ace->lock, flags); - tasklet_schedule(&ace->fsm_tasklet); - wait_for_completion(&ace->id_completion); - } + tasklet_schedule(&ace->fsm_tasklet); + wait_for_completion(&ace->id_completion); dev_dbg(ace->dev, "revalidate complete\n"); - return ace->id_result; } static int ace_open(struct block_device *bdev, fmode_t mode) @@ -922,8 +916,8 @@ static int ace_open(struct block_device *bdev, fmode_t mode) ace->users++; spin_unlock_irqrestore(&ace->lock, flags); - if (bdev_check_media_change(bdev)) - ace_revalidate_disk(bdev->bd_disk); + if (bdev_check_media_change(bdev) && ace->media_change) + ace_media_changed(ace); mutex_unlock(&xsysace_mutex); return 0; @@ -1080,7 +1074,7 @@ static int ace_setup(struct ace_device *ace) (unsigned long long) ace->physaddr, ace->baseaddr, ace->irq); ace->media_change = 1; - ace_revalidate_disk(ace->gd); + ace_media_changed(ace); /* Make the sysace device 'live' */ add_disk(ace->gd); -- cgit v1.2.3 From 1570d14f6713d07716861dea1bb4739df37056d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:38 +0200 Subject: paride/pcd: use bdev_check_media_change The pcd driver does not have a ->revalidate_disk method, so it can just use bdev_check_media_change without any additional changes. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 5124eca90e83..70da8b86ce58 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -233,7 +233,7 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode) struct pcd_unit *cd = bdev->bd_disk->private_data; int ret; - check_disk_change(bdev); + bdev_check_media_change(bdev); mutex_lock(&pcd_mutex); ret = cdrom_open(&cd->info, bdev, mode); -- cgit v1.2.3 From faf041388c28e93be9644985d2da63f82b6f7233 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:39 +0200 Subject: gdrom: use bdev_check_media_change The Sega GD-ROM driver does not have a ->revalidate_disk method, so it can just use bdev_check_media_change without any additional changes. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 09b0cd292720..9874fc1c815b 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -479,7 +479,7 @@ static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) { int ret; - check_disk_change(bdev); + bdev_check_media_change(bdev); mutex_lock(&gdrom_mutex); ret = cdrom_open(gd.cd_info, bdev, mode); -- cgit v1.2.3 From a22be69d5354c63d379cf7ecdbf3eef257f14512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:40 +0200 Subject: ide-cd: use bdev_check_media_changed Switch to use bdev_check_media_changed instead of check_disk_change and call idecd_revalidate_disk manually. Given that idecd_revalidate_disk only re-reads the TOC, and we already do the same at probe time, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/ide/ide-cd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 212bb2d8bf34..6a38cbc80aea 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -56,6 +56,7 @@ static DEFINE_MUTEX(ide_cd_mutex); static DEFINE_MUTEX(idecd_ref_mutex); static void ide_cd_release(struct device *); +static int idecd_revalidate_disk(struct gendisk *disk); static struct cdrom_info *ide_cd_get(struct gendisk *disk) { @@ -1611,7 +1612,8 @@ static int idecd_open(struct block_device *bdev, fmode_t mode) struct cdrom_info *info; int rc = -ENXIO; - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + idecd_revalidate_disk(bdev->bd_disk); mutex_lock(&ide_cd_mutex); info = ide_cd_get(bdev->bd_disk); @@ -1770,7 +1772,6 @@ static const struct block_device_operations idecd_ops = { .compat_ioctl = IS_ENABLED(CONFIG_COMPAT) ? idecd_compat_ioctl : NULL, .check_events = idecd_check_events, - .revalidate_disk = idecd_revalidate_disk }; /* module options */ -- cgit v1.2.3 From a367e440e15c19f5694186dda9dd8a0f9eeb0c9f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:41 +0200 Subject: ide-cd: remove idecd_revalidate_disk Just merge the trivial function into its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/ide/ide-cd.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6a38cbc80aea..25d2d88e82ad 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -56,7 +56,6 @@ static DEFINE_MUTEX(ide_cd_mutex); static DEFINE_MUTEX(idecd_ref_mutex); static void ide_cd_release(struct device *); -static int idecd_revalidate_disk(struct gendisk *disk); static struct cdrom_info *ide_cd_get(struct gendisk *disk) { @@ -1612,8 +1611,11 @@ static int idecd_open(struct block_device *bdev, fmode_t mode) struct cdrom_info *info; int rc = -ENXIO; - if (bdev_check_media_change(bdev)) - idecd_revalidate_disk(bdev->bd_disk); + if (bdev_check_media_change(bdev)) { + info = ide_drv_g(bdev->bd_disk, cdrom_info); + + ide_cd_read_toc(info->drive); + } mutex_lock(&ide_cd_mutex); info = ide_cd_get(bdev->bd_disk); @@ -1755,15 +1757,6 @@ static unsigned int idecd_check_events(struct gendisk *disk, return cdrom_check_events(&info->devinfo, clearing); } -static int idecd_revalidate_disk(struct gendisk *disk) -{ - struct cdrom_info *info = ide_drv_g(disk, cdrom_info); - - ide_cd_read_toc(info->drive); - - return 0; -} - static const struct block_device_operations idecd_ops = { .owner = THIS_MODULE, .open = idecd_open, -- cgit v1.2.3 From fec2cf607ba9305770436b1e5c485963a9f0a7bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:42 +0200 Subject: ide-gd: stop using the disk events mechanism ide-gd is only using the disk events mechanism to be able to force an invalidation and partition scan on opening removable media. Just open code the logic without invoving the block layer. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 5 +---- drivers/ide/ide-floppy.c | 2 -- drivers/ide/ide-gd.c | 48 ++++++------------------------------------------ include/linux/ide.h | 2 -- 4 files changed, 7 insertions(+), 50 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 1d3407d7e095..34b9441084f8 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -739,12 +739,9 @@ static void ide_disk_setup(ide_drive_t *drive) set_wcache(drive, 1); if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 && - (drive->head == 0 || drive->head > 16)) { + (drive->head == 0 || drive->head > 16)) printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n", drive->name, drive->head); - drive->dev_flags &= ~IDE_DFLAG_ATTACH; - } else - drive->dev_flags |= IDE_DFLAG_ATTACH; } static void ide_disk_flush(ide_drive_t *drive) diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index af7503b47dbe..f5a2870aaf54 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -516,8 +516,6 @@ static void ide_floppy_setup(ide_drive_t *drive) (void) ide_floppy_get_capacity(drive); ide_proc_register_driver(drive, floppy->driver); - - drive->dev_flags |= IDE_DFLAG_ATTACH; } static void ide_floppy_flush(ide_drive_t *drive) diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 05c26986637b..661e2aa9c967 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -225,8 +225,12 @@ static int ide_gd_open(struct block_device *bdev, fmode_t mode) * and the door_lock is irrelevant at this point. */ drive->disk_ops->set_doorlock(drive, disk, 1); - drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED; - check_disk_change(bdev); + if (__invalidate_device(bdev, true)) + pr_warn("VFS: busy inodes on changed media %s\n", + bdev->bd_disk->disk_name); + drive->disk_ops->get_capacity(drive); + set_capacity(disk, ide_gd_capacity(drive)); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) { ret = -EBUSY; goto out_put_idkp; @@ -284,32 +288,6 @@ static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static unsigned int ide_gd_check_events(struct gendisk *disk, - unsigned int clearing) -{ - struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); - ide_drive_t *drive = idkp->drive; - bool ret; - - /* do not scan partitions twice if this is a removable device */ - if (drive->dev_flags & IDE_DFLAG_ATTACH) { - drive->dev_flags &= ~IDE_DFLAG_ATTACH; - return 0; - } - - /* - * The following is used to force revalidation on the first open on - * removeable devices, and never gets reported to userland as - * DISK_EVENT_FLAG_UEVENT isn't set in genhd->event_flags. - * This is intended as removable ide disk can't really detect - * MEDIA_CHANGE events. - */ - ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED; - drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED; - - return ret ? DISK_EVENT_MEDIA_CHANGE : 0; -} - static void ide_gd_unlock_native_capacity(struct gendisk *disk) { struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); @@ -320,18 +298,6 @@ static void ide_gd_unlock_native_capacity(struct gendisk *disk) disk_ops->unlock_native_capacity(drive); } -static int ide_gd_revalidate_disk(struct gendisk *disk) -{ - struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); - ide_drive_t *drive = idkp->drive; - - if (ide_gd_check_events(disk, 0)) - drive->disk_ops->get_capacity(drive); - - set_capacity(disk, ide_gd_capacity(drive)); - return 0; -} - static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -364,9 +330,7 @@ static const struct block_device_operations ide_gd_ops = { .compat_ioctl = ide_gd_compat_ioctl, #endif .getgeo = ide_gd_getgeo, - .check_events = ide_gd_check_events, .unlock_native_capacity = ide_gd_unlock_native_capacity, - .revalidate_disk = ide_gd_revalidate_disk }; static int ide_gd_probe(ide_drive_t *drive) diff --git a/include/linux/ide.h b/include/linux/ide.h index a254841bd315..62653769509f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -490,8 +490,6 @@ enum { IDE_DFLAG_NOPROBE = BIT(9), /* need to do check_media_change() */ IDE_DFLAG_REMOVABLE = BIT(10), - /* needed for removable devices */ - IDE_DFLAG_ATTACH = BIT(11), IDE_DFLAG_FORCED_GEOM = BIT(12), /* disallow setting unmask bit */ IDE_DFLAG_NO_UNMASK = BIT(13), -- cgit v1.2.3 From 818077d6e0125b4a7b728457b60b5d89e02a0ce1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:43 +0200 Subject: md: use bdev_check_media_change The md driver does not have a ->revalidate_disk method, so it can just use bdev_check_media_change without any additional changes. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 9562ef598ae1..27ed61197014 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7848,7 +7848,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) atomic_inc(&mddev->openers); mutex_unlock(&mddev->open_mutex); - check_disk_change(bdev); + bdev_check_media_change(bdev); out: if (err) mddev_put(mddev); -- cgit v1.2.3 From 471bd0af544bcd80450d61659a1ffabdd1820d7d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:44 +0200 Subject: sd: use bdev_check_media_change Switch to use bdev_check_media_change instead of check_disk_change and call sd_revalidate_disk manually. As sd also calls sd_revalidate_disk manually during probe and open, the extra call into ->revalidate_disk from bdev_disk_changed is not required either, so stop wiring up the method. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2bec8cd52616..d020639c28c6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1381,8 +1381,10 @@ static int sd_open(struct block_device *bdev, fmode_t mode) if (!scsi_block_when_processing_errors(sdev)) goto error_out; - if (sdev->removable || sdkp->write_prot) - check_disk_change(bdev); + if (sdev->removable || sdkp->write_prot) { + if (bdev_check_media_change(bdev)) + sd_revalidate_disk(bdev->bd_disk); + } /* * If the drive is empty, just let the open fail. @@ -1843,7 +1845,6 @@ static const struct block_device_operations sd_fops = { .compat_ioctl = sd_compat_ioctl, #endif .check_events = sd_check_events, - .revalidate_disk = sd_revalidate_disk, .unlock_native_capacity = sd_unlock_native_capacity, .report_zones = sd_zbc_report_zones, .pr_ops = &sd_pr_ops, -- cgit v1.2.3 From afd35c4f573d7eb1df9dbb7e0af0f4d93bd01f31 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:45 +0200 Subject: sr: use bdev_check_media_change Switch to use bdev_check_media_change instead of check_disk_change and call sr_block_revalidate_disk manually. Also add an explicit call to sr_block_revalidate_disk just before disk_add() to ensure we always read check for a ready unit and read the TOC and then stop wiring up ->revalidate_disk. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/scsi/sr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 3b3a53c6a0de..34be94b62523 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -86,6 +86,7 @@ static int sr_remove(struct device *); static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt); static int sr_done(struct scsi_cmnd *); static int sr_runtime_suspend(struct device *dev); +static int sr_block_revalidate_disk(struct gendisk *disk); static const struct dev_pm_ops sr_pm_ops = { .runtime_suspend = sr_runtime_suspend, @@ -529,7 +530,8 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode) sdev = cd->device; scsi_autopm_get_device(sdev); - check_disk_change(bdev); + if (bdev_check_media_change(bdev)) + sr_block_revalidate_disk(bdev->bd_disk); mutex_lock(&cd->lock); ret = cdrom_open(&cd->cdi, bdev, mode); @@ -688,7 +690,6 @@ static const struct block_device_operations sr_bdops = .compat_ioctl = sr_block_compat_ioctl, #endif .check_events = sr_block_check_events, - .revalidate_disk = sr_block_revalidate_disk, }; static int sr_open(struct cdrom_device_info *cdi, int purpose) @@ -802,6 +803,7 @@ static int sr_probe(struct device *dev) dev_set_drvdata(dev, cd); disk->flags |= GENHD_FL_REMOVABLE; + sr_block_revalidate_disk(disk); device_add_disk(&sdev->sdev_gendev, disk, NULL); sdev_printk(KERN_DEBUG, sdev, -- cgit v1.2.3 From 38a2b557e23874dd5ed3c767d57d28b2d10b7683 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:46 +0200 Subject: sr: simplify sr_block_revalidate_disk Both callers have a valid CD struture available, so rely on that instead of getting another reference. Also move the function to avoid a forward declaration. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/scsi/sr.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 34be94b62523..2b43c0f97442 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -86,7 +86,6 @@ static int sr_remove(struct device *); static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt); static int sr_done(struct scsi_cmnd *); static int sr_runtime_suspend(struct device *dev); -static int sr_block_revalidate_disk(struct gendisk *disk); static const struct dev_pm_ops sr_pm_ops = { .runtime_suspend = sr_runtime_suspend, @@ -518,6 +517,17 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) return ret; } +static void sr_revalidate_disk(struct scsi_cd *cd) +{ + struct scsi_sense_hdr sshdr; + + /* if the unit is not ready, nothing more to do */ + if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) + return; + sr_cd_check(&cd->cdi); + get_sectorsize(cd); +} + static int sr_block_open(struct block_device *bdev, fmode_t mode) { struct scsi_cd *cd; @@ -531,7 +541,7 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode) sdev = cd->device; scsi_autopm_get_device(sdev); if (bdev_check_media_change(bdev)) - sr_block_revalidate_disk(bdev->bd_disk); + sr_revalidate_disk(cd); mutex_lock(&cd->lock); ret = cdrom_open(&cd->cdi, bdev, mode); @@ -660,26 +670,6 @@ static unsigned int sr_block_check_events(struct gendisk *disk, return ret; } -static int sr_block_revalidate_disk(struct gendisk *disk) -{ - struct scsi_sense_hdr sshdr; - struct scsi_cd *cd; - - cd = scsi_cd_get(disk); - if (!cd) - return -ENXIO; - - /* if the unit is not ready, nothing more to do */ - if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) - goto out; - - sr_cd_check(&cd->cdi); - get_sectorsize(cd); -out: - scsi_cd_put(cd); - return 0; -} - static const struct block_device_operations sr_bdops = { .owner = THIS_MODULE, @@ -803,7 +793,7 @@ static int sr_probe(struct device *dev) dev_set_drvdata(dev, cd); disk->flags |= GENHD_FL_REMOVABLE; - sr_block_revalidate_disk(disk); + sr_revalidate_disk(cd); device_add_disk(&sdev->sdev_gendev, disk, NULL); sdev_printk(KERN_DEBUG, sdev, -- cgit v1.2.3 From 00fe60eae94efa5629bb6caf3839045cb22017e0 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 31 Aug 2020 15:27:24 -0700 Subject: md: use part_[begin|end]_io_acct instead of disk_[begin|end]_io_acct This enables proper statistics in /proc/diskstats for md partitions. Signed-off-by: Song Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/md.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 27ed61197014..64bc22d2b606 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -464,6 +464,7 @@ struct md_io { bio_end_io_t *orig_bi_end_io; void *orig_bi_private; unsigned long start_time; + struct hd_struct *part; }; static void md_end_io(struct bio *bio) @@ -471,7 +472,7 @@ static void md_end_io(struct bio *bio) struct md_io *md_io = bio->bi_private; struct mddev *mddev = md_io->mddev; - disk_end_io_acct(mddev->gendisk, bio_op(bio), md_io->start_time); + part_end_io_acct(md_io->part, bio, md_io->start_time); bio->bi_end_io = md_io->orig_bi_end_io; bio->bi_private = md_io->orig_bi_private; @@ -517,9 +518,8 @@ static blk_qc_t md_submit_bio(struct bio *bio) bio->bi_end_io = md_end_io; bio->bi_private = md_io; - md_io->start_time = disk_start_io_acct(mddev->gendisk, - bio_sectors(bio), - bio_op(bio)); + md_io->start_time = part_start_io_acct(mddev->gendisk, + &md_io->part, bio); } /* bio could be mergeable after passing to underlayer */ -- cgit v1.2.3 From 0806e60f31ec10822cd230d12603169d1bb7840e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 31 Aug 2020 15:27:25 -0700 Subject: bcache: use part_[begin|end]_io_acct instead of disk_[begin|end]_io_acct This enables proper statistics in /proc/diskstats for bcache partitions. Signed-off-by: Song Liu Reviewed-by: Coly Li Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index c7cadaafa947..7f54ae223644 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -475,6 +475,7 @@ struct search { unsigned int read_dirty_data:1; unsigned int cache_missed:1; + struct hd_struct *part; unsigned long start_time; struct btree_op op; @@ -669,7 +670,7 @@ static void bio_complete(struct search *s) { if (s->orig_bio) { /* Count on bcache device */ - disk_end_io_acct(s->d->disk, bio_op(s->orig_bio), s->start_time); + part_end_io_acct(s->part, s->orig_bio, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); s->orig_bio->bi_status = s->iop.status; @@ -731,7 +732,7 @@ static inline struct search *search_alloc(struct bio *bio, s->write = op_is_write(bio_op(bio)); s->read_dirty_data = 0; /* Count on the bcache device */ - s->start_time = disk_start_io_acct(d->disk, bio_sectors(bio), bio_op(bio)); + s->start_time = part_start_io_acct(d->disk, &s->part, bio); s->iop.c = d->c; s->iop.bio = NULL; s->iop.inode = d->id; @@ -1072,6 +1073,7 @@ struct detached_dev_io_private { unsigned long start_time; bio_end_io_t *bi_end_io; void *bi_private; + struct hd_struct *part; }; static void detached_dev_end_io(struct bio *bio) @@ -1083,7 +1085,7 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_private = ddip->bi_private; /* Count on the bcache device */ - disk_end_io_acct(ddip->d->disk, bio_op(bio), ddip->start_time); + part_end_io_acct(ddip->part, bio, ddip->start_time); if (bio->bi_status) { struct cached_dev *dc = container_of(ddip->d, @@ -1109,7 +1111,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); ddip->d = d; /* Count on the bcache device */ - ddip->start_time = disk_start_io_acct(d->disk, bio_sectors(bio), bio_op(bio)); + ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio); ddip->bi_end_io = bio->bi_end_io; ddip->bi_private = bio->bi_private; bio->bi_end_io = detached_dev_end_io; -- cgit v1.2.3 From 38430f0876fa8b9549ec434f569dce03e057c076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:45 +0200 Subject: block: move the NEED_PART_SCAN flag to struct gendisk We can only scan for partitions on the whole disk, so move the flag from struct block_device to struct gendisk. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 4 ++-- drivers/block/nbd.c | 8 ++++---- drivers/ide/ide-gd.c | 2 +- fs/block_dev.c | 7 +++---- include/linux/blk_types.h | 4 +--- include/linux/genhd.h | 2 ++ 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/block/genhd.c b/block/genhd.c index 9d060e79eb31..7b56203c90a3 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -731,7 +731,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, if (!bdev) goto exit; - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &disk->state); err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; @@ -2112,7 +2112,7 @@ bool bdev_check_media_change(struct block_device *bdev) if (__invalidate_device(bdev, true)) pr_warn("VFS: busy inodes on changed media %s\n", bdev->bd_disk->disk_name); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); return true; } EXPORT_SYMBOL(bdev_check_media_change); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 15eed210feef..2dca0aab0a9a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -315,7 +315,7 @@ static void nbd_size_update(struct nbd_device *nbd) bd_set_nr_sectors(bdev, nr_sectors); set_blocksize(bdev, config->blksize); } else - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); bdput(bdev); } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -1322,7 +1322,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b return ret; if (max_part) - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); @@ -1500,9 +1500,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } else if (nbd_disconnected(nbd->config)) { - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } out: mutex_unlock(&nbd_index_mutex); diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 661e2aa9c967..e2b6c82586ce 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -230,7 +230,7 @@ static int ide_gd_open(struct block_device *bdev, fmode_t mode) bdev->bd_disk->disk_name); drive->disk_ops->get_capacity(drive); set_capacity(disk, ide_gd_capacity(drive)); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &disk->state); } else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) { ret = -EBUSY; goto out_put_idkp; diff --git a/fs/block_dev.c b/fs/block_dev.c index 0b34955b9e36..1a9325f43157 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -910,7 +910,6 @@ struct block_device *bdget(dev_t dev) bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_part_count = 0; - bdev->bd_flags = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1385,7 +1384,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); - clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); rescan: ret = blk_drop_partitions(bdev); @@ -1509,7 +1508,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && + if (test_bit(GD_NEED_PART_SCAN, &disk->state) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); @@ -1539,7 +1538,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && + if (test_bit(GD_NEED_PART_SCAN, &disk->state) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6ffa783e1633..eb20e28184ab 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -19,8 +19,6 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; -#define BDEV_NEED_PART_SCAN 0 - struct block_device { dev_t bd_dev; int bd_openers; @@ -39,7 +37,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - unsigned long bd_flags; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1c97cf84f011..38f23d757013 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -191,6 +191,8 @@ struct gendisk { void *private_data; int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 struct rw_semaphore lookup_sem; struct kobject *slave_dir; -- cgit v1.2.3 From 2d9e28a9a10c2381a267a5184560ddc9f673f71b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:48 +0200 Subject: pktcdvd: remove the if 0'ed pkt_start_recovery function Remove code which has been dead since the initial commit. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 67 ++----------------------------------------------- 1 file changed, 2 insertions(+), 65 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 17f2e6ff1223..bc870a5f15f7 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1082,65 +1082,6 @@ static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *p } } -/* - * recover a failed write, query for relocation if possible - * - * returns 1 if recovery is possible, or 0 if not - * - */ -static int pkt_start_recovery(struct packet_data *pkt) -{ - /* - * FIXME. We need help from the file system to implement - * recovery handling. - */ - return 0; -#if 0 - struct request *rq = pkt->rq; - struct pktcdvd_device *pd = rq->rq_disk->private_data; - struct block_device *pkt_bdev; - struct super_block *sb = NULL; - unsigned long old_block, new_block; - sector_t new_sector; - - pkt_bdev = bdget(kdev_t_to_nr(pd->pkt_dev)); - if (pkt_bdev) { - sb = get_super(pkt_bdev); - bdput(pkt_bdev); - } - - if (!sb) - return 0; - - if (!sb->s_op->relocate_blocks) - goto out; - - old_block = pkt->sector / (CD_FRAMESIZE >> 9); - if (sb->s_op->relocate_blocks(sb, old_block, &new_block)) - goto out; - - new_sector = new_block * (CD_FRAMESIZE >> 9); - pkt->sector = new_sector; - - bio_reset(pkt->bio); - bio_set_dev(pkt->bio, pd->bdev); - bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0); - pkt->bio->bi_iter.bi_sector = new_sector; - pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE; - pkt->bio->bi_vcnt = pkt->frames; - - pkt->bio->bi_end_io = pkt_end_io_packet_write; - pkt->bio->bi_private = pkt; - - drop_super(sb); - return 1; - -out: - drop_super(sb); - return 0; -#endif -} - static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state) { #if PACKET_DEBUG > 1 @@ -1357,12 +1298,8 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data break; case PACKET_RECOVERY_STATE: - if (pkt_start_recovery(pkt)) { - pkt_start_write(pd, pkt); - } else { - pkt_dbg(2, pd, "No recovery possible\n"); - pkt_set_state(pkt, PACKET_FINISHED_STATE); - } + pkt_dbg(2, pd, "No recovery possible\n"); + pkt_set_state(pkt, PACKET_FINISHED_STATE); break; case PACKET_FINISHED_STATE: -- cgit v1.2.3 From b8d954849cf4689d88ad84fd5cdb2d71d060f66a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:49 +0200 Subject: pktcdvd: use blkdev_get_by_dev instead of open coding it Replace bdget + blkdev_get by blkdev_get_by_dev. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index bc870a5f15f7..467dbd06b7cd 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2110,16 +2110,18 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) int ret; long lba; struct request_queue *q; + struct block_device *bdev; /* * We need to re-open the cdrom device without O_NONBLOCK to be able * to read/write from/to it. It is already opened in O_NONBLOCK mode - * so bdget() can't fail. + * so open should not fail. */ - bdget(pd->bdev->bd_dev); - ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd); - if (ret) + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); goto out; + } ret = pkt_get_last_written(pd, &lba); if (ret) { @@ -2163,7 +2165,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) return 0; out_putdev: - blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); out: return ret; } @@ -2500,7 +2502,6 @@ static int pkt_seq_show(struct seq_file *m, void *p) static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { int i; - int ret = 0; char b[BDEVNAME_SIZE]; struct block_device *bdev; @@ -2523,12 +2524,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } } - bdev = bdget(dev); - if (!bdev) - return -ENOMEM; - ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); - if (ret) - return ret; + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) { blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); return -EINVAL; @@ -2546,7 +2544,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); if (IS_ERR(pd->cdrw.thread)) { pkt_err(pd, "can't start kernel thread\n"); - ret = -ENOMEM; goto out_mem; } @@ -2558,7 +2555,7 @@ out_mem: blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); - return ret; + return -ENOMEM; } static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) -- cgit v1.2.3 From 0fc66c9d63143f9437cd57cf5eab50a258fa40ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:50 +0200 Subject: zram: cleanup backing_dev_store Use blkdev_get_by_dev instead of bdgrab + blkdev_get. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a356275605b1..91ccfe444525 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -491,9 +491,10 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - bdev = bdgrab(I_BDEV(inode)); - err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); - if (err < 0) { + bdev = blkdev_get_by_dev(inode->i_rdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); + if (IS_ERR(bdev)) { + err = PTR_ERR(bdev); bdev = NULL; goto out; } -- cgit v1.2.3 From 5a56ad7882a11624043db539a2c2076923148aa6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:51 +0200 Subject: raw: don't keep unopened block device around Turn binding into a normal dev_t as the struct block device doesn't buy us anything and use blkdev_open_by_dev to actually open it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/char/raw.c | 51 +++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index ccf5bd528642..5d52a1f4738c 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -28,7 +28,8 @@ #include struct raw_device_data { - struct block_device *binding; + dev_t binding; + struct block_device *bdev; int inuse; }; @@ -73,14 +74,15 @@ static int raw_open(struct inode *inode, struct file *filp) /* * All we need to do on open is check that the device is bound. */ - bdev = raw_devices[minor].binding; err = -ENODEV; - if (!bdev) + if (!raw_devices[minor].binding) goto out; - bdgrab(bdev); - err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open); - if (err) + bdev = blkdev_get_by_dev(raw_devices[minor].binding, + filp->f_mode | FMODE_EXCL, raw_open); + if (IS_ERR(bdev)) { + err = PTR_ERR(bdev); goto out; + } err = set_blocksize(bdev, bdev_logical_block_size(bdev)); if (err) goto out1; @@ -90,6 +92,7 @@ static int raw_open(struct inode *inode, struct file *filp) file_inode(filp)->i_mapping = bdev->bd_inode->i_mapping; filp->private_data = bdev; + raw_devices[minor].bdev = bdev; mutex_unlock(&raw_mutex); return 0; @@ -110,7 +113,7 @@ static int raw_release(struct inode *inode, struct file *filp) struct block_device *bdev; mutex_lock(&raw_mutex); - bdev = raw_devices[minor].binding; + bdev = raw_devices[minor].bdev; if (--raw_devices[minor].inuse == 0) /* Here inode->i_mapping == bdev->bd_inode->i_mapping */ inode->i_mapping = &inode->i_data; @@ -133,6 +136,7 @@ raw_ioctl(struct file *filp, unsigned int command, unsigned long arg) static int bind_set(int number, u64 major, u64 minor) { dev_t dev = MKDEV(major, minor); + dev_t raw = MKDEV(RAW_MAJOR, number); struct raw_device_data *rawdev; int err = 0; @@ -166,25 +170,17 @@ static int bind_set(int number, u64 major, u64 minor) mutex_unlock(&raw_mutex); return -EBUSY; } - if (rawdev->binding) { - bdput(rawdev->binding); + if (rawdev->binding) module_put(THIS_MODULE); - } + + rawdev->binding = dev; if (!dev) { /* unbind */ - rawdev->binding = NULL; - device_destroy(raw_class, MKDEV(RAW_MAJOR, number)); + device_destroy(raw_class, raw); } else { - rawdev->binding = bdget(dev); - if (rawdev->binding == NULL) { - err = -ENOMEM; - } else { - dev_t raw = MKDEV(RAW_MAJOR, number); - __module_get(THIS_MODULE); - device_destroy(raw_class, raw); - device_create(raw_class, NULL, raw, NULL, - "raw%d", number); - } + __module_get(THIS_MODULE); + device_destroy(raw_class, raw); + device_create(raw_class, NULL, raw, NULL, "raw%d", number); } mutex_unlock(&raw_mutex); return err; @@ -192,18 +188,9 @@ static int bind_set(int number, u64 major, u64 minor) static int bind_get(int number, dev_t *dev) { - struct raw_device_data *rawdev; - struct block_device *bdev; - if (number <= 0 || number >= max_raw_minors) return -EINVAL; - - rawdev = &raw_devices[number]; - - mutex_lock(&raw_mutex); - bdev = rawdev->binding; - *dev = bdev ? bdev->bd_dev : 0; - mutex_unlock(&raw_mutex); + *dev = raw_devices[number].binding; return 0; } -- cgit v1.2.3 From 841ab72cb5be3bbf6544f28329b81c73f0f835b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:52 +0200 Subject: dasd: cleanup dasd_scan_partitions Use blkdev_get_by_dev instead of bdget_disk + blkdev_get. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_genhd.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index af5b0ecb8f89..a9698fba9b76 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -101,18 +101,11 @@ int dasd_scan_partitions(struct dasd_block *block) struct block_device *bdev; int rc; - bdev = bdget_disk(block->gdp, 0); - if (!bdev) { - DBF_DEV_EVENT(DBF_ERR, block->base, "%s", - "scan partitions error, bdget returned NULL"); - return -ENODEV; - } - - rc = blkdev_get(bdev, FMODE_READ, NULL); - if (rc < 0) { + bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL); + if (IS_ERR(bdev)) { DBF_DEV_EVENT(DBF_ERR, block->base, - "scan partitions error, blkdev_get returned %d", - rc); + "scan partitions error, blkdev_get returned %ld", + PTR_ERR(bdev)); return -ENODEV; } -- cgit v1.2.3 From b807a2c5e0e2e81c96160682977c6f497cfcee96 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:29 +0200 Subject: drbd: remove dead code in device_to_statistics Ever since the switch to blk-mq, a lower device not used for VM writeback will not be marked congested, so the check will never trigger. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 43c8ae4d9fca..aaff5bde3915 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -3370,7 +3370,6 @@ static void device_to_statistics(struct device_statistics *s, if (get_ldev(device)) { struct drbd_md *md = &device->ldev->md; u64 *history_uuids = (u64 *)s->history_uuids; - struct request_queue *q; int n; spin_lock_irq(&md->uuid_lock); @@ -3384,11 +3383,6 @@ static void device_to_statistics(struct device_statistics *s, spin_unlock_irq(&md->uuid_lock); s->dev_disk_flags = md->flags; - q = bdev_get_queue(device->ldev->backing_bdev); - s->dev_lower_blocked = - bdi_congested(q->backing_dev_info, - (1 << WB_async_congested) | - (1 << WB_sync_congested)); put_ldev(device); } s->dev_size = drbd_get_capacity(device->this_bdev); -- cgit v1.2.3 From 5d4ce78b254cde8aaeb829420c3f15d3606ad411 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:30 +0200 Subject: bcache: inherit the optimal I/O size Inherit the optimal I/O size setting just like the readahead window, as any reason to do larger I/O does not apply to just readahead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Martin K. Petersen Acked-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1bbdc410ee3c..48113005ed86 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1430,6 +1430,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) dc->disk.disk->queue->backing_dev_info->ra_pages = max(dc->disk.disk->queue->backing_dev_info->ra_pages, q->backing_dev_info->ra_pages); + blk_queue_io_opt(dc->disk.disk->queue, + max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); atomic_set(&dc->io_errors, 0); dc->io_disable = false; -- cgit v1.2.3 From 9e82d35b95e55aa50d264caee64a17840e3981b7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:31 +0200 Subject: aoe: set an optimal I/O size aoe forces a larger readahead size, but any reason to do larger I/O is not limited to readahead. Also set the optimal I/O size, and remove the local constants in favor of just using SZ_2G. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 5ca7216e9e01..d8cfc233e64b 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -347,7 +347,6 @@ aoeblk_gdalloc(void *vp) mempool_t *mp; struct request_queue *q; struct blk_mq_tag_set *set; - enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; ulong flags; int late = 0; int err; @@ -407,7 +406,8 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE; + q->backing_dev_info->ra_pages = SZ_2M / PAGE_SIZE; + blk_queue_io_opt(q, SZ_2M); d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; -- cgit v1.2.3 From 55b2598e84e97efc5d952958cb5e34236c43276b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:32 +0200 Subject: bdi: initialize ->ra_pages and ->io_pages in bdi_init Set up a readahead size by default, as very few users have a good reason to change it. This means code, ecryptfs, and orangefs now set up the values while they were previously missing it, while ubifs, mtd and vboxsf manually set it to 0 to avoid readahead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: David Sterba [btrfs] Acked-by: Richard Weinberger [ubifs, mtd] Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- drivers/mtd/mtdcore.c | 2 ++ fs/9p/vfs_super.c | 6 ++++-- fs/afs/super.c | 1 - fs/btrfs/disk-io.c | 1 - fs/fuse/inode.c | 1 - fs/nfs/super.c | 9 +-------- fs/ubifs/super.c | 2 ++ fs/vboxsf/super.c | 2 ++ mm/backing-dev.c | 2 ++ 10 files changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/block/blk-core.c b/block/blk-core.c index ca3f0f00c943..865d39e5be2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -538,8 +538,6 @@ struct request_queue *blk_alloc_queue(int node_id) if (!q->stats) goto fail_stats; - q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES; - q->backing_dev_info->io_pages = VM_READAHEAD_PAGES; q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->node = node_id; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 7d930569a7df..b5e5d3140f57 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -2196,6 +2196,8 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name) bdi = bdi_alloc(NUMA_NO_NODE); if (!bdi) return ERR_PTR(-ENOMEM); + bdi->ra_pages = 0; + bdi->io_pages = 0; /* * We put '-0' suffix to the name to get the same name format as we diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 74df32be4c6a..e34fa20acf61 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -80,8 +80,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (ret) return ret; - if (v9ses->cache) - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; + if (!v9ses->cache) { + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; + } sb->s_flags |= SB_ACTIVE | SB_DIRSYNC; if (!v9ses->cache) diff --git a/fs/afs/super.c b/fs/afs/super.c index b552357b1d13..3a40ee752c1e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -456,7 +456,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) ret = super_setup_bdi(sb); if (ret) return ret; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* allocate the root inode and dentry */ if (as->dyn_root) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6bba7eb1fa1..047934cea25e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3092,7 +3092,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bba747520e9b..17b00670fb53 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1049,7 +1049,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) if (err) return err; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7a70287f21a2..f943e37853fa 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1200,13 +1200,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, } #endif -static void nfs_set_readahead(struct backing_dev_info *bdi, - unsigned long iomax_pages) -{ - bdi->ra_pages = VM_READAHEAD_PAGES; - bdi->io_pages = iomax_pages; -} - int nfs_get_tree_common(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); @@ -1251,7 +1244,7 @@ int nfs_get_tree_common(struct fs_context *fc) MINOR(server->s_dev)); if (error) goto error_splat_super; - nfs_set_readahead(s->s_bdi, server->rpages); + s->s_bdi->io_pages = server->rpages; server->super = s; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a2420c900275..fbddb2a1c03f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2177,6 +2177,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) c->vi.vol_id); if (err) goto out_close; + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 8fe03b4a0d2b..8e3792177a85 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -167,6 +167,8 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; /* Turn source into a shfl_string and map the folder */ size = strlen(fc->source) + 1; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8e8b00627bb2..2dac3be61271 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -746,6 +746,8 @@ struct backing_dev_info *bdi_alloc(int node_id) kfree(bdi); return NULL; } + bdi->ra_pages = VM_READAHEAD_PAGES; + bdi->io_pages = VM_READAHEAD_PAGES; return bdi; } EXPORT_SYMBOL(bdi_alloc); -- cgit v1.2.3 From 16ef510139315a2147ee7525796f8dbd4e4b7864 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:33 +0200 Subject: md: update the optimal I/O size on reshape The raid5 and raid10 drivers currently update the read-ahead size, but not the optimal I/O size on reshape. To prepare for deriving the read-ahead size from the optimal I/O size make sure it is updated as well. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/raid10.c | 22 ++++++++++++++-------- drivers/md/raid5.c | 10 ++++++++-- 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e8fa32733917..9956a04ac13b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3703,10 +3703,20 @@ static struct r10conf *setup_conf(struct mddev *mddev) return ERR_PTR(err); } +static void raid10_set_io_opt(struct r10conf *conf) +{ + int raid_disks = conf->geo.raid_disks; + + if (!(conf->geo.raid_disks % conf->geo.near_copies)) + raid_disks /= conf->geo.near_copies; + blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) * + raid_disks); +} + static int raid10_run(struct mddev *mddev) { struct r10conf *conf; - int i, disk_idx, chunk_size; + int i, disk_idx; struct raid10_info *disk; struct md_rdev *rdev; sector_t size; @@ -3742,18 +3752,13 @@ static int raid10_run(struct mddev *mddev) mddev->thread = conf->thread; conf->thread = NULL; - chunk_size = mddev->chunk_sectors << 9; if (mddev->queue) { blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, 0); blk_queue_max_write_zeroes_sectors(mddev->queue, 0); - blk_queue_io_min(mddev->queue, chunk_size); - if (conf->geo.raid_disks % conf->geo.near_copies) - blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); - else - blk_queue_io_opt(mddev->queue, chunk_size * - (conf->geo.raid_disks / conf->geo.near_copies)); + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + raid10_set_io_opt(conf); } rdev_for_each(rdev, mddev) { @@ -4727,6 +4732,7 @@ static void end_reshape(struct r10conf *conf) stripe /= conf->geo.near_copies; if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; + raid10_set_io_opt(conf); } conf->fullsync = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 225380efd1e2..9a7d1250894e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7232,6 +7232,12 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded return 0; } +static void raid5_set_io_opt(struct r5conf *conf) +{ + blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * + (conf->raid_disks - conf->max_degraded)); +} + static int raid5_run(struct mddev *mddev) { struct r5conf *conf; @@ -7521,8 +7527,7 @@ static int raid5_run(struct mddev *mddev) chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); - blk_queue_io_opt(mddev->queue, chunk_size * - (conf->raid_disks - conf->max_degraded)); + raid5_set_io_opt(conf); mddev->queue->limits.raid_partial_stripes_expensive = 1; /* * We can only discard a whole stripe. It doesn't make sense to @@ -8115,6 +8120,7 @@ static void end_reshape(struct r5conf *conf) / PAGE_SIZE); if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; + raid5_set_io_opt(conf); } } } -- cgit v1.2.3 From c2e4cd57cfa1f627b786c764d185fff85fd12be9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:34 +0200 Subject: block: lift setting the readahead size into the block layer Drivers shouldn't really mess with the readahead size, as that is a VM concept. Instead set it based on the optimal I/O size by lifting the algorithm from the md driver when registering the disk. Also set bdi->io_pages there as well by applying the same scheme based on max_sectors. To ensure the limits work well for stacking drivers a new helper is added to update the readahead limits from the block limits, which is also called from disk_stack_limits. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Jan Kara Reviewed-by: Mike Snitzer Reviewed-by: Martin K. Petersen Acked-by: Coly Li Signed-off-by: Jens Axboe --- block/blk-settings.c | 18 ++++++++++++++++-- block/blk-sysfs.c | 2 ++ drivers/block/aoe/aoeblk.c | 1 - drivers/block/drbd/drbd_nl.c | 10 +--------- drivers/md/bcache/super.c | 3 --- drivers/md/dm-table.c | 3 +-- drivers/md/raid0.c | 16 ---------------- drivers/md/raid10.c | 24 +----------------------- drivers/md/raid5.c | 13 +------------ drivers/nvme/host/core.c | 1 + include/linux/blkdev.h | 1 + 11 files changed, 24 insertions(+), 68 deletions(-) (limited to 'drivers') diff --git a/block/blk-settings.c b/block/blk-settings.c index 5ea3de48afba..4f6eb4bb1723 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -372,6 +372,19 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) } EXPORT_SYMBOL(blk_queue_alignment_offset); +void blk_queue_update_readahead(struct request_queue *q) +{ + /* + * For read-ahead of large files to be effective, we need to read ahead + * at least twice the optimal I/O size. + */ + q->backing_dev_info->ra_pages = + max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + q->backing_dev_info->io_pages = + queue_max_sectors(q) >> (PAGE_SHIFT - 9); +} +EXPORT_SYMBOL_GPL(blk_queue_update_readahead); + /** * blk_limits_io_min - set minimum request size for a device * @limits: the queue limits @@ -450,6 +463,8 @@ EXPORT_SYMBOL(blk_limits_io_opt); void blk_queue_io_opt(struct request_queue *q, unsigned int opt) { blk_limits_io_opt(&q->limits, opt); + q->backing_dev_info->ra_pages = + max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); } EXPORT_SYMBOL(blk_queue_io_opt); @@ -631,8 +646,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, top, bottom); } - t->backing_dev_info->io_pages = - t->limits.max_sectors >> (PAGE_SHIFT - 9); + blk_queue_update_readahead(disk->queue); } EXPORT_SYMBOL(disk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 81722cdcf0cb..869ed21a9edc 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -854,6 +854,8 @@ int blk_register_queue(struct gendisk *disk) percpu_ref_switch_to_percpu(&q->q_usage_counter); } + blk_queue_update_readahead(q); + ret = blk_trace_init_sysfs(dev); if (ret) return ret; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index d8cfc233e64b..c34e71b0c4a9 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -406,7 +406,6 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - q->backing_dev_info->ra_pages = SZ_2M / PAGE_SIZE; blk_queue_io_opt(q, SZ_2M); d->bufpool = mp; d->blkq = gd->queue = q; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index aaff5bde3915..54a4930c04fe 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1362,15 +1362,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi if (b) { blk_stack_limits(&q->limits, &b->limits, 0); - - if (q->backing_dev_info->ra_pages != - b->backing_dev_info->ra_pages) { - drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info->ra_pages, - b->backing_dev_info->ra_pages); - q->backing_dev_info->ra_pages = - b->backing_dev_info->ra_pages; - } + blk_queue_update_readahead(q); } fixup_discard_if_not_supported(q); fixup_write_zeroes(device, q); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 48113005ed86..6bfa77167362 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1427,9 +1427,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) if (ret) return ret; - dc->disk.disk->queue->backing_dev_info->ra_pages = - max(dc->disk.disk->queue->backing_dev_info->ra_pages, - q->backing_dev_info->ra_pages); blk_queue_io_opt(dc->disk.disk->queue, max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5edc3079e7c1..ef2757012f59 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1925,8 +1925,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } #endif - /* Allow reads to exceed readahead limits */ - q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9); + blk_queue_update_readahead(q); } unsigned int dm_table_get_num_targets(struct dm_table *t) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f54a449f97aa..aa2d72791768 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -410,22 +410,6 @@ static int raid0_run(struct mddev *mddev) mdname(mddev), (unsigned long long)mddev->array_sectors); - if (mddev->queue) { - /* calculate the max read-ahead size. - * For read-ahead of large files to be effective, we need to - * readahead at least twice a whole stripe. i.e. number of devices - * multiplied by chunk size times 2. - * If an individual device has an ra_pages greater than the - * chunk size, then we will not drive that device as hard as it - * wants. We consider this a configuration error: a larger - * chunksize should be used in that case. - */ - int stripe = mddev->raid_disks * - (mddev->chunk_sectors << 9) / PAGE_SIZE; - if (mddev->queue->backing_dev_info->ra_pages < 2* stripe) - mddev->queue->backing_dev_info->ra_pages = 2* stripe; - } - dump_zones(mddev); ret = md_integrity_register(mddev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9956a04ac13b..5d1bdee313ec 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3873,19 +3873,6 @@ static int raid10_run(struct mddev *mddev) mddev->resync_max_sectors = size; set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); - if (mddev->queue) { - int stripe = conf->geo.raid_disks * - ((mddev->chunk_sectors << 9) / PAGE_SIZE); - - /* Calculate max read-ahead size. - * We need to readahead at least twice a whole stripe.... - * maybe... - */ - stripe /= conf->geo.near_copies; - if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - mddev->queue->backing_dev_info->ra_pages = 2 * stripe; - } - if (md_integrity_register(mddev)) goto out_free_conf; @@ -4723,17 +4710,8 @@ static void end_reshape(struct r10conf *conf) conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); - /* read-ahead size must cover two whole stripes, which is - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices - */ - if (conf->mddev->queue) { - int stripe = conf->geo.raid_disks * - ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE); - stripe /= conf->geo.near_copies; - if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; + if (conf->mddev->queue) raid10_set_io_opt(conf); - } conf->fullsync = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9a7d1250894e..7ace1f76b147 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7522,8 +7522,6 @@ static int raid5_run(struct mddev *mddev) int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * ((mddev->chunk_sectors << 9) / PAGE_SIZE); - if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - mddev->queue->backing_dev_info->ra_pages = 2 * stripe; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); @@ -8111,17 +8109,8 @@ static void end_reshape(struct r5conf *conf) spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); - /* read-ahead size must cover two whole stripes, which is - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices - */ - if (conf->mddev->queue) { - int data_disks = conf->raid_disks - conf->max_degraded; - int stripe = data_disks * ((conf->chunk_sectors << 9) - / PAGE_SIZE); - if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; + if (conf->mddev->queue) raid5_set_io_opt(conf); - } } } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ea1fa41fbba8..741c9bfa8e14 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2147,6 +2147,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) nvme_update_disk_info(ns->head->disk, ns, id); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); + blk_queue_update_readahead(ns->head->disk->queue); nvme_update_bdev_size(ns->head->disk); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index be5ef6f4ba19..282f5ca424f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1140,6 +1140,7 @@ extern void blk_queue_max_zone_append_sectors(struct request_queue *q, extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); +void blk_queue_update_readahead(struct request_queue *q); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From a8b456d01cd6b37191f14248f3e2bdbe5ce3a89e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:36 +0200 Subject: bdi: remove BDI_CAP_SYNCHRONOUS_IO BDI_CAP_SYNCHRONOUS_IO is only checked in the swap code, and used to decided if ->rw_page can be used on a block device. Just check up for the method instead. The only complication is that zram needs a second set of block_device_operations as it can switch between modes that actually support ->rw_page and those who don't. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/block/brd.c | 1 - drivers/block/zram/zram_drv.c | 19 +++++++++++++------ drivers/nvdimm/btt.c | 2 -- drivers/nvdimm/pmem.c | 1 - include/linux/backing-dev.h | 9 --------- mm/swapfile.c | 2 +- 6 files changed, 14 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 2723a70eb855..cc49a921339f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -403,7 +403,6 @@ static struct brd_device *brd_alloc(int i) disk->flags = GENHD_FL_EXT_DEVT; sprintf(disk->disk_name, "ram%d", i); set_capacity(disk, rd_size * 2); - brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 91ccfe444525..e21ca844d7c2 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -52,6 +52,9 @@ static unsigned int num_devices = 1; */ static size_t huge_class_size; +static const struct block_device_operations zram_devops; +static const struct block_device_operations zram_wb_devops; + static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -408,8 +411,7 @@ static void reset_bdev(struct zram *zram) zram->backing_dev = NULL; zram->old_block_size = 0; zram->bdev = NULL; - zram->disk->queue->backing_dev_info->capabilities |= - BDI_CAP_SYNCHRONOUS_IO; + zram->disk->fops = &zram_devops; kvfree(zram->bitmap); zram->bitmap = NULL; } @@ -529,8 +531,7 @@ static ssize_t backing_dev_store(struct device *dev, * freely but in fact, IO is going on so finally could cause * use-after-free when the IO is really done. */ - zram->disk->queue->backing_dev_info->capabilities &= - ~BDI_CAP_SYNCHRONOUS_IO; + zram->disk->fops = &zram_wb_devops; up_write(&zram->init_lock); pr_info("setup backing device %s\n", file_name); @@ -1820,6 +1821,13 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static const struct block_device_operations zram_wb_devops = { + .open = zram_open, + .submit_bio = zram_submit_bio, + .swap_slot_free_notify = zram_slot_free_notify, + .owner = THIS_MODULE +}; + static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); @@ -1947,8 +1955,7 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - zram->disk->queue->backing_dev_info->capabilities |= - (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); + zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 0d710140bf93..12ff6f8784ac 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1537,8 +1537,6 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->private_data = btt; btt->btt_disk->queue = btt->btt_queue; btt->btt_disk->flags = GENHD_FL_EXT_DEVT; - btt->btt_disk->queue->backing_dev_info->capabilities |= - BDI_CAP_SYNCHRONOUS_IO; blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 140cf3b9000c..1711fdfd8d28 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -475,7 +475,6 @@ static int pmem_attach_disk(struct device *dev, disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; disk->private_data = pmem; - disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; nvdimm_namespace_disk_name(ndns, disk->disk_name); set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 52583b6f2ea0..860ea33571bc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -122,9 +122,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_WRITEBACK: Don't write pages back * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. - * - * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be - * inefficient. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -132,7 +129,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 -#define BDI_CAP_SYNCHRONOUS_IO 0x00000040 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) @@ -174,11 +170,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; -} - static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) { return bdi->capabilities & BDI_CAP_STABLE_WRITES; diff --git a/mm/swapfile.c b/mm/swapfile.c index b90f86920743..96a7c47dd533 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3240,7 +3240,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) p->flags |= SWP_STABLE_WRITES; - if (bdi_cap_synchronous_io(inode_to_bdi(inode))) + if (p->bdev && p->bdev->bd_disk->fops->rw_page) p->flags |= SWP_SYNCHRONOUS_IO; if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { -- cgit v1.2.3 From 1cb039f3dc1619eb795c54aad0a98fdb379b4237 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:38 +0200 Subject: bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag The BDI_CAP_STABLE_WRITES is one of the few bits of information in the backing_dev_info shared between the block drivers and the writeback code. To help untangling the dependency replace it with a queue flag and a superblock flag derived from it. This also helps with the case of e.g. a file system requiring stable writes due to its own checksumming, but not forcing it on other users of the block device like the swap code. One downside is that we an't support the stable_pages_required bdi attribute in sysfs anymore. It is replaced with a queue attribute which also is writable for easier testing. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-integrity.c | 4 ++-- block/blk-mq-debugfs.c | 1 + block/blk-sysfs.c | 3 +++ drivers/block/rbd.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/dm-table.c | 6 +++--- drivers/md/raid5.c | 8 ++++---- drivers/mmc/core/queue.c | 3 +-- drivers/nvme/host/core.c | 3 +-- drivers/nvme/host/multipath.c | 10 +++------- drivers/scsi/iscsi_tcp.c | 4 ++-- fs/super.c | 2 ++ include/linux/backing-dev.h | 6 ------ include/linux/blkdev.h | 3 +++ include/linux/fs.h | 1 + mm/backing-dev.c | 7 +++---- mm/page-writeback.c | 2 +- mm/swapfile.c | 2 +- 18 files changed, 33 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index c03705cbb9c9..2b36a8f9b813 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -408,7 +408,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; - disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); #ifdef CONFIG_BLK_INLINE_ENCRYPTION if (disk->queue->ksm) { @@ -428,7 +428,7 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { - disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 645b7f800cb8..3094542e12ae 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -116,6 +116,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(INIT_DONE), + QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(WC), QUEUE_FLAG_NAME(FUA), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 869ed21a9edc..76b54c7750b0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -287,6 +287,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); +QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); #undef QUEUE_SYSFS_BIT_FNS static ssize_t queue_zoned_show(struct request_queue *q, char *page) @@ -613,6 +614,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { QUEUE_RW_ENTRY(queue_nonrot, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); +QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, @@ -645,6 +647,7 @@ static struct attribute *queue_attrs[] = { &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, + &queue_stable_writes_entry.attr, &queue_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5d3923c0997c..cf5b016358cd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5022,7 +5022,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) } if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); /* * disk_release() expects a queue ref from add_disk() and will diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e21ca844d7c2..bff3d4021c18 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1955,7 +1955,7 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ef2757012f59..405d7cf10eb9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1815,7 +1815,7 @@ static int device_requires_stable_pages(struct dm_target *ti, { struct request_queue *q = bdev_get_queue(dev->bdev); - return q && bdi_cap_stable_pages_required(q->backing_dev_info); + return q && blk_queue_stable_writes(q); } /* @@ -1900,9 +1900,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * because they do their own checksumming. */ if (dm_table_requires_stable_pages(t)) - q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); /* * Determine whether or not this queue's I/O timings contribute diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ace1f76b147..d589d26c86ea 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6638,14 +6638,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) if (!conf) err = -ENODEV; else if (new != conf->skip_copy) { + struct request_queue *q = mddev->queue; + mddev_suspend(mddev); conf->skip_copy = new; if (new) - mddev->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - mddev->queue->backing_dev_info->capabilities &= - ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); mddev_resume(mddev); } mddev_unlock(mddev); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 6c022ef0f84d..80fe3852ce0f 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -472,8 +472,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) } if (mmc_host_is_spi(host) && host->use_spi_crc) - mq->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); mq->queue->queuedata = mq; blk_queue_rq_timeout(mq->queue, 60 * HZ); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 741c9bfa8e14..c190c56bf702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3926,8 +3926,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) goto out_free_ns; if (ctrl->opts && ctrl->opts->data_digest) - ns->queue->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d4ba736c6c89..74896be40c17 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -673,13 +673,9 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_mpath_set_live(ns); } - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct gendisk *disk = ns->head->disk; - - if (disk) - disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; - } + if (blk_queue_stable_writes(ns->queue) && ns->head->disk) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, + ns->head->disk->queue); } void nvme_mpath_remove_disk(struct nvme_ns_head *head) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index b5dd1caae5e9..a622f334c933 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -962,8 +962,8 @@ static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev) struct iscsi_conn *conn = session->leadconn; if (conn->datadgst_en) - sdev->request_queue->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, + sdev->request_queue); blk_queue_dma_alignment(sdev->request_queue, 0); return 0; } diff --git a/fs/super.c b/fs/super.c index 904459b35119..a51c2083cd6b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1256,6 +1256,8 @@ static int set_bdev_super(struct super_block *s, void *data) s->s_dev = s->s_bdev->bd_dev; s->s_bdi = bdi_get(s->s_bdev->bd_bdi); + if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue)) + s->s_iflags |= SB_I_STABLE_WRITES; return 0; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 860ea33571bc..5da4ea3dd0cc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -126,7 +126,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 #define BDI_CAP_NO_ACCT_WB 0x00000004 -#define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 @@ -170,11 +169,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_STABLE_WRITES; -} - static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 282f5ca424f1..8e77f12de522 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -606,6 +606,7 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ +#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 17 /* Write back caching */ #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ @@ -635,6 +636,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_stable_writes(q) \ + test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index fbd74df5ce5f..222465b7cf41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,6 +1385,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ +#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2dac3be61271..8e3802bf03a9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -204,10 +204,9 @@ static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *page) { - struct backing_dev_info *bdi = dev_get_drvdata(dev); - - return snprintf(page, PAGE_SIZE-1, "%d\n", - bdi_cap_stable_pages_required(bdi) ? 1 : 0); + dev_warn_once(dev, + "the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.\n"); + return snprintf(page, PAGE_SIZE-1, "%d\n", 0); } static DEVICE_ATTR_RO(stable_pages_required); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4e4ddd67b71e..e9c36521461a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2849,7 +2849,7 @@ EXPORT_SYMBOL_GPL(wait_on_page_writeback); */ void wait_for_stable_page(struct page *page) { - if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host))) + if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) wait_on_page_writeback(page); } EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 96a7c47dd533..65ef407512b5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3237,7 +3237,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } - if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) + if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && p->bdev->bd_disk->fops->rw_page) -- cgit v1.2.3 From fa01b1e9733fd59ecb8b5b6d85dfb481d2025fbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:40:57 +0200 Subject: block: add a bdev_is_partition helper Add a littler helper to make the somewhat arcane bd_contains checks a little more obvious. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Signed-off-by: Jens Axboe --- block/blk-lib.c | 2 +- block/ioctl.c | 4 ++-- block/scsi_ioctl.c | 2 +- drivers/ide/ide-ioctls.c | 4 ++-- drivers/md/dm-table.c | 2 +- drivers/mmc/core/block.c | 2 +- drivers/s390/block/dasd_ioctl.c | 8 ++++---- fs/nfsd/blocklayout.c | 4 ++-- include/linux/blkdev.h | 9 +++++++-- kernel/trace/blktrace.c | 2 +- 10 files changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/block/blk-lib.c b/block/blk-lib.c index 0d1811e57ac7..e90614fd8d6a 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -64,7 +64,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -EINVAL; /* In case the discard request is in a partition */ - if (bdev->bd_partno) + if (bdev_is_partition(bdev)) part_offset = bdev->bd_part->start_sect; while (nr_sects) { diff --git a/block/ioctl.c b/block/ioctl.c index 06262c28f0c6..3fbc382eb926 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -23,7 +23,7 @@ static int blkpg_do_ioctl(struct block_device *bdev, return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) return -EFAULT; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return -EINVAL; if (p.pno <= 0) @@ -94,7 +94,7 @@ static int blkdev_reread_part(struct block_device *bdev) { int ret; - if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains) + if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 4421e61c1af1..d4abd1ed5a2d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -852,7 +852,7 @@ EXPORT_SYMBOL(scsi_cmd_ioctl); int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) { - if (bd && bd == bd->bd_contains) + if (bd && !bdev_is_partition(bd)) return 0; if (capable(CAP_SYS_RAWIO)) diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 09491098047b..58994da10c06 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -49,7 +49,7 @@ read_val: return err >= 0 ? put_user_long(err, arg) : err; set_val: - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) err = -EINVAL; else { if (!capable(CAP_SYS_ADMIN)) @@ -257,7 +257,7 @@ int generic_ide_ioctl(ide_drive_t *drive, struct block_device *bdev, switch (cmd) { case HDIO_OBSOLETE_IDENTITY: case HDIO_GET_IDENTITY: - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return -EINVAL; return ide_get_identity_ioctl(drive, cmd, argp); case HDIO_GET_NICE: diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 405d7cf10eb9..12d909044c10 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -903,7 +903,7 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, struct request_queue *q = bdev_get_queue(bdev); /* request-based cannot stack on partitions! */ - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return false; return queue_is_mq(q); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index fa313b634135..8d3df0be0355 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -723,7 +723,7 @@ static int mmc_blk_check_blkdev(struct block_device *bdev) * whole block device, not on a partition. This prevents overspray * between sibling partitions. */ - if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + if (!capable(CAP_SYS_RAWIO) || bdev_is_partition(bdev)) return -EPERM; return 0; } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index faaf5596e31c..cb6427fb9f3d 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -277,7 +277,7 @@ dasd_ioctl_format(struct block_device *bdev, void __user *argp) dasd_put_device(base); return -EFAULT; } - if (bdev != bdev->bd_contains) { + if (bdev_is_partition(bdev)) { pr_warn("%s: The specified DASD is a partition and cannot be formatted\n", dev_name(&base->cdev->dev)); dasd_put_device(base); @@ -304,7 +304,7 @@ static int dasd_ioctl_check_format(struct block_device *bdev, void __user *argp) base = dasd_device_from_gendisk(bdev->bd_disk); if (!base) return -ENODEV; - if (bdev != bdev->bd_contains) { + if (bdev_is_partition(bdev)) { pr_warn("%s: The specified DASD is a partition and cannot be checked\n", dev_name(&base->cdev->dev)); rc = -EINVAL; @@ -362,7 +362,7 @@ static int dasd_ioctl_release_space(struct block_device *bdev, void __user *argp rc = -EROFS; goto out_err; } - if (bdev != bdev->bd_contains) { + if (bdev_is_partition(bdev)) { pr_warn("%s: The specified DASD is a partition and tracks cannot be released\n", dev_name(&base->cdev->dev)); rc = -EINVAL; @@ -540,7 +540,7 @@ dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) // ro setting is not allowed for partitions return -EINVAL; if (get_user(intval, (int __user *)argp)) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 311e5ce80cfc..a07c39c94bbd 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -170,7 +170,7 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb, struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp) { - if (sb->s_bdev != sb->s_bdev->bd_contains) + if (bdev_is_partition(sb->s_bdev)) return nfserr_inval; return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); } @@ -382,7 +382,7 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp) { - if (sb->s_bdev != sb->s_bdev->bd_contains) + if (bdev_is_partition(sb->s_bdev)) return nfserr_inval; return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8e77f12de522..33f283885ba5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1353,6 +1353,11 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, extern int blk_verify_command(unsigned char *cmd, fmode_t mode); +static inline bool bdev_is_partition(struct block_device *bdev) +{ + return bdev->bd_partno; +} + enum blk_default_limits { BLK_MAX_SEGMENTS = 128, BLK_SAFE_MAX_SECTORS = 255, @@ -1469,7 +1474,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) if (q->limits.misaligned) return -1; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, bdev->bd_part->start_sect); return q->limits.alignment_offset; @@ -1510,7 +1515,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, bdev->bd_part->start_sect); return q->limits.discard_alignment; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c0887f32f647..ec874ea04092 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -527,7 +527,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * and scsi-generic block devices we create a temporary new debugfs * directory that will be removed once the trace ends. */ - if (bdev && bdev == bdev->bd_contains) + if (bdev && !bdev_is_partition(bdev)) dir = q->debugfs_dir; else bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); -- cgit v1.2.3 From 61a27e1f521ca0dd4a38302958ccc43fd95d8879 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:40:58 +0200 Subject: md: compare bd_disk instead of bd_contains To check for partitions of the same disk bd_contains works as well, but bd_disk is way more obvious. Signed-off-by: Christoph Hellwig Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 64bc22d2b606..09ed6d0c7dbe 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2322,8 +2322,7 @@ static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2) test_bit(Journal, &rdev2->flags) || rdev2->raid_disk == -1) continue; - if (rdev->bdev->bd_contains == - rdev2->bdev->bd_contains) { + if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) { rcu_read_unlock(); return 1; } @@ -5944,8 +5943,8 @@ int md_run(struct mddev *mddev) rdev_for_each(rdev, mddev) rdev_for_each(rdev2, mddev) { if (rdev < rdev2 && - rdev->bdev->bd_contains == - rdev2->bdev->bd_contains) { + rdev->bdev->bd_disk == + rdev2->bdev->bd_disk) { pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n", mdname(mddev), bdevname(rdev->bdev,b), -- cgit v1.2.3 From 4245e52d255418278fc4a5eec7425ad0bfeebd31 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:40:59 +0200 Subject: md: don't detour through bd_contains for the gendisk bd_disk is set on all block devices, including those for partitions. Signed-off-by: Christoph Hellwig Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 2 +- drivers/md/md.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 09ed6d0c7dbe..de8419b7ae98 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8444,7 +8444,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) idle = 1; rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { - struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; + struct gendisk *disk = rdev->bdev->bd_disk; curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats diff --git a/drivers/md/md.h b/drivers/md/md.h index f9e2ccdd22c4..2175a5ac4f7c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -551,7 +551,7 @@ extern void mddev_unlock(struct mddev *mddev); static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) { - atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); + atomic_add(nr_sectors, &bdev->bd_disk->sync_io); } static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) -- cgit v1.2.3 From 8c40c7c4836a1ff9e5e840e5ab28526392a5724c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:41:00 +0200 Subject: drbd: don't detour through bd_contains for the gendisk bd_disk is set on all block devices, including those for partitions. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 422363daa618..2d16fb090599 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2789,7 +2789,7 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) { - struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; + struct gendisk *disk = device->ldev->backing_bdev->bd_disk; unsigned long db, dt, dbdt; unsigned int c_min_rate; int curr_events; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 7c903de5c4e1..9107b109ba7b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1672,7 +1672,7 @@ void drbd_resync_after_changed(struct drbd_device *device) void drbd_rs_controller_reset(struct drbd_device *device) { - struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; + struct gendisk *disk = device->ldev->backing_bdev->bd_disk; struct fifo_buffer *plan; atomic_set(&device->rs_sect_in, 0); -- cgit v1.2.3 From 74f94454099e89322f82fce35f7c73142516d17d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:41:01 +0200 Subject: drbd: don't set ->bd_contains The ->bd_contains field is set by __blkdev_get and drivers have no business manipulating it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 04b6bde9419d..bd8b129196d4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2766,8 +2766,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig disk->private_data = device; device->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor)); - /* we have no partitions. we contain only ourselves. */ - device->this_bdev->bd_contains = device->this_bdev; blk_queue_write_cache(q, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here -- cgit v1.2.3 From 57ba10592030be8c8b17184ca093db5e45cdbb2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:41:02 +0200 Subject: target/iblock: fix holder printing in iblock_show_configfs_dev_params bd_contains is never NULL for an open block device. In addition ibd_bd is always set to a block device that was exclusively opened by the target code, so the holder is guranteed to be ib_dev as well. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/target/target_core_iblock.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 1c181d31f4c8..f2bd2e207e0b 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -611,9 +611,8 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) bl += sprintf(b + bl, " "); if (bd) { bl += sprintf(b + bl, "Major: %d Minor: %d %s\n", - MAJOR(bd->bd_dev), MINOR(bd->bd_dev), (!bd->bd_contains) ? - "" : (bd->bd_holder == ib_dev) ? - "CLAIMED: IBLOCK" : "CLAIMED: OS"); + MAJOR(bd->bd_dev), MINOR(bd->bd_dev), + "CLAIMED: IBLOCK"); } else { bl += sprintf(b + bl, "Major: 0 Minor: 0\n"); } -- cgit v1.2.3 From 6abc49468eeaad2dd481779906aaa29605139087 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 23 Sep 2020 16:06:52 -0400 Subject: dm: add support for REQ_NOWAIT and enable it for linear target Add DM target feature flag DM_TARGET_NOWAIT which advertises that target works with REQ_NOWAIT bios. Add dm_table_supports_nowait() and update dm_table_set_restrictions() to set/clear QUEUE_FLAG_NOWAIT accordingly. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- drivers/md/dm-linear.c | 5 +++-- drivers/md/dm-table.c | 32 ++++++++++++++++++++++++++++++++ drivers/md/dm.c | 4 +++- include/linux/device-mapper.h | 6 ++++++ 4 files changed, 44 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index e1db43446327..00774b5d7668 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -228,10 +228,11 @@ static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, #ifdef CONFIG_BLK_DEV_ZONED - .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT | + DM_TARGET_ZONED_HM, .report_zones = linear_report_zones, #else - .features = DM_TARGET_PASSES_INTEGRITY, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT, #endif .module = THIS_MODULE, .ctr = linear_ctr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 12d909044c10..07c319e98581 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1748,6 +1748,33 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t) return true; } +static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !blk_queue_nowait(q); +} + +static bool dm_table_supports_nowait(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!dm_target_supports_nowait(ti->type)) + return false; + + if (!ti->type->iterate_devices || + ti->type->iterate_devices(ti, device_not_nowait_capable, NULL)) + return false; + } + + return true; +} + static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1850,6 +1877,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, */ q->limits = *limits; + if (dm_table_supports_nowait(t)) + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); + else + blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); + if (!dm_table_supports_discards(t)) { blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); /* Must also clear discard limits... */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3dedd9cc4fb6..59b0687f9235 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1802,7 +1802,9 @@ static blk_qc_t dm_submit_bio(struct bio *bio) if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { dm_put_live_table(md, srcu_idx); - if (!(bio->bi_opf & REQ_RAHEAD)) + if (bio->bi_opf & REQ_NOWAIT) + bio_wouldblock_error(bio); + else if (!(bio->bi_opf & REQ_RAHEAD)) queue_io(md, bio); else bio_io_error(bio); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 93096e524e43..d6f8d4ba8d48 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -252,6 +252,12 @@ struct target_type { #define DM_TARGET_ZONED_HM 0x00000040 #define dm_target_supports_zoned_hm(type) ((type)->features & DM_TARGET_ZONED_HM) +/* + * A target handles REQ_NOWAIT + */ +#define DM_TARGET_NOWAIT 0x00000080 +#define dm_target_supports_nowait(type) ((type)->features & DM_TARGET_NOWAIT) + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3