From c2b4bb8cb3741c0bacf3683e4c1ecd04c977ada3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:42 +0200 Subject: block: fix locking for struct block_device size updates Two different callers use two different mutexes for updating the block device size, which obviously doesn't help to actually protect against concurrent updates from the different callers. In addition one of the locks, bd_mutex is rather prone to deadlocks with other parts of the block stack that use it for high level synchronization. Switch to using a new spinlock protecting just the size updates, as that is all we need, and make sure everyone does the update through the proper helper. This fixes a bug reported with the nvme revalidating disks during a hot removal operation, which can currently deadlock on bd_mutex. Reported-by: Xianting Tian Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/partitions/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/partitions') diff --git a/block/partitions/core.c b/block/partitions/core.c index 5b4869c08fb3..b1c0b50ca92d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -592,8 +592,8 @@ int bdev_resize_partition(struct block_device *bdev, int partno, if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT); + part_nr_sects_write(part, length); + bd_set_nr_sectors(bdevp, length); ret = 0; out_unlock: -- cgit v1.2.3 From 7b8917f5e29c377be1db5680249fe30e038cb3eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:33 +0200 Subject: block: remove the alignment_offset field from struct hd_struct The alignment offset is only used in slow path callers, so just calculate it on the fly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 7 ++++--- include/linux/blkdev.h | 5 ++--- include/linux/genhd.h | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'block/partitions') diff --git a/block/partitions/core.c b/block/partitions/core.c index b1c0b50ca92d..94c2fb39e602 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -199,7 +199,10 @@ static ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); + + return sprintf(buf, "%u\n", + queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, + p->start_sect)); } static ssize_t part_discard_alignment_show(struct device *dev, @@ -405,8 +408,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev = part_to_dev(p); p->start_sect = start; - p->alignment_offset = - queue_limit_alignment_offset(&disk->queue->limits, start); p->discard_alignment = queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0a1730b30ad2..ba1f5f5e11c6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1456,10 +1456,9 @@ static inline int bdev_alignment_offset(struct block_device *bdev) if (q->limits.misaligned) return -1; - if (bdev != bdev->bd_contains) - return bdev->bd_part->alignment_offset; - + return queue_limit_alignment_offset(&q->limits, + bdev->bd_part->start_sect); return q->limits.alignment_offset; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 39025dc0397c..bfa411c80dbb 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,7 +65,6 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; - sector_t alignment_offset; unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; -- cgit v1.2.3 From 7cf34d97ab45203b975396393ded9d3867dfa8bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:34 +0200 Subject: block: remove the discard_alignment field from struct hd_struct The alignment offset is only used in slow path callers, so just calculate it on the fly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 7 ++++--- include/linux/blkdev.h | 4 ++-- include/linux/genhd.h | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'block/partitions') diff --git a/block/partitions/core.c b/block/partitions/core.c index 94c2fb39e602..e596a85bba1e 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -209,7 +209,10 @@ static ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%u\n", p->discard_alignment); + + return sprintf(buf, "%u\n", + queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, + p->start_sect)); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); @@ -408,8 +411,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev = part_to_dev(p); p->start_sect = start; - p->discard_alignment = - queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba1f5f5e11c6..d0d61bc81615 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1498,8 +1498,8 @@ static inline int bdev_discard_alignment(struct block_device *bdev) struct request_queue *q = bdev_get_queue(bdev); if (bdev != bdev->bd_contains) - return bdev->bd_part->discard_alignment; - + return queue_limit_discard_alignment(&q->limits, + bdev->bd_part->start_sect); return q->limits.discard_alignment; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index bfa411c80dbb..9ea2ca31c278 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -65,7 +65,6 @@ struct hd_struct { struct disk_stats __percpu *dkstats; struct percpu_ref ref; - unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v1.2.3 From 8328eb28369a7dbfab6ff26366dbe8094425acc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:38 +0200 Subject: block: remove the disk argument to delete_partition We can trivially derive the gendisk from the hd_struct. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk.h | 2 +- block/genhd.c | 2 +- block/partitions/core.c | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'block/partitions') diff --git a/block/blk.h b/block/blk.h index a180443ac13f..c08762e10b04 100644 --- a/block/blk.h +++ b/block/blk.h @@ -361,7 +361,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -void delete_partition(struct gendisk *disk, struct hd_struct *part); +void delete_partition(struct hd_struct *part); int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int bdev_del_partition(struct block_device *bdev, int partno); diff --git a/block/genhd.c b/block/genhd.c index 055ce9cf1835..2055b5bf637a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -913,7 +913,7 @@ void del_gendisk(struct gendisk *disk) DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { invalidate_partition(disk, part->partno); - delete_partition(disk, part); + delete_partition(part); } disk_part_iter_exit(&piter); diff --git a/block/partitions/core.c b/block/partitions/core.c index e596a85bba1e..dd6811422a87 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -324,8 +324,9 @@ int hd_ref_init(struct hd_struct *part) * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -void delete_partition(struct gendisk *disk, struct hd_struct *part) +void delete_partition(struct hd_struct *part) { + struct gendisk *disk = part_to_disk(part); struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); @@ -333,7 +334,7 @@ void delete_partition(struct gendisk *disk, struct hd_struct *part) * ->part_tbl is referenced in this part's release handler, so * we have to hold the disk device */ - get_device(disk_to_dev(part_to_disk(part))); + get_device(disk_to_dev(disk)); rcu_assign_pointer(ptbl->part[part->partno], NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); @@ -556,7 +557,7 @@ int bdev_del_partition(struct block_device *bdev, int partno) sync_blockdev(bdevp); invalidate_bdev(bdevp); - delete_partition(bdev->bd_disk, part); + delete_partition(part); ret = 0; out_unlock: mutex_unlock(&bdev->bd_mutex); @@ -636,7 +637,7 @@ int blk_drop_partitions(struct block_device *bdev) disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - delete_partition(bdev->bd_disk, part); + delete_partition(part); disk_part_iter_exit(&piter); return 0; -- cgit v1.2.3 From 10ed16662da9e28a33b6c991c36c6b323b03dd5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 18:06:18 +0200 Subject: block: add a bdget_part helper All remaining callers of bdget() outside of fs/block_dev.c want to get a reference to the struct block_device for a given struct hd_struct. Add a helper just for that and then mark bdget static. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- block/partitions/core.c | 2 +- fs/block_dev.c | 9 ++++++--- include/linux/blkdev.h | 2 +- kernel/trace/blktrace.c | 9 ++------- 5 files changed, 11 insertions(+), 13 deletions(-) (limited to 'block/partitions') diff --git a/block/genhd.c b/block/genhd.c index e5f17f022ec7..0a273211fec2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1048,7 +1048,7 @@ struct block_device *bdget_disk(struct gendisk *disk, int partno) part = disk_get_part(disk, partno); if (part) - bdev = bdget(part_devt(part)); + bdev = bdget_part(part); disk_put_part(part); return bdev; diff --git a/block/partitions/core.c b/block/partitions/core.c index dd6811422a87..5309e0f44ba3 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -580,7 +580,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno, return -ENXIO; ret = -ENOMEM; - bdevp = bdget(part_devt(part)); + bdevp = bdget_part(part); if (!bdevp) goto out_put_part; diff --git a/fs/block_dev.c b/fs/block_dev.c index 6b9d19ffa5af..9e84b1928b94 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -891,7 +891,7 @@ static int bdev_set(struct inode *inode, void *data) return 0; } -struct block_device *bdget(dev_t dev) +static struct block_device *bdget(dev_t dev) { struct block_device *bdev; struct inode *inode; @@ -920,8 +920,6 @@ struct block_device *bdget(dev_t dev) return bdev; } -EXPORT_SYMBOL(bdget); - /** * bdgrab -- Grab a reference to an already referenced block device * @bdev: Block device to grab a reference to. @@ -933,6 +931,11 @@ struct block_device *bdgrab(struct block_device *bdev) } EXPORT_SYMBOL(bdgrab); +struct block_device *bdget_part(struct hd_struct *part) +{ + return bdget(part_devt(part)); +} + long nr_blockdev_pages(void) { struct inode *inode; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5a3e1a4c2f7..cf80e61b4c5e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2003,7 +2003,7 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void blkdev_put(struct block_device *bdev, fmode_t mode); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget(dev_t); +struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ec874ea04092..f1022945e346 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1827,13 +1827,11 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = bdget_part(dev_to_part(dev)); struct request_queue *q; - struct block_device *bdev; struct blk_trace *bt; ssize_t ret = -ENXIO; - bdev = bdget(part_devt(p)); if (bdev == NULL) goto out; @@ -1875,7 +1873,6 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, { struct block_device *bdev; struct request_queue *q; - struct hd_struct *p; struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1895,9 +1892,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; ret = -ENXIO; - - p = dev_to_part(dev); - bdev = bdget(part_devt(p)); + bdev = bdget_part(dev_to_part(dev)); if (bdev == NULL) goto out; -- cgit v1.2.3