From 7f8d12ea96352275c2850c24a1367166179392d2 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 Mar 2022 15:55:59 +0900 Subject: fs: add a lockdep check function for sb_start_write() Add a function sb_write_started() to allow callers to verify if sb_start_write() is properly called. It will be used for assertion in btrfs. Reviewed-by: Filipe Manana Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbde95387a23..01d61984ce7a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1708,6 +1708,11 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) +static inline bool sb_write_started(const struct super_block *sb) +{ + return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1); +} + /** * sb_end_write - drop write access to a superblock * @sb: the super we wrote to -- cgit v1.2.3 From a31b4a4368d28c5e780f0906588fbd1dcfe4ad54 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Apr 2022 06:43:09 +0200 Subject: btrfs: simplify WQ_HIGHPRI handling in struct btrfs_workqueue Just let the one caller that wants optional WQ_HIGHPRI handling allocate a separate btrfs_workqueue for that. This allows to rename struct __btrfs_workqueue to btrfs_workqueue, remove a pointer indirection and separate allocation for all btrfs_workqueue users and generally simplify the code. Reviewed-by: Qu Wenruo Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/async-thread.c | 122 ++++++++----------------------------------- fs/btrfs/async-thread.h | 7 +-- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 15 +++--- fs/btrfs/super.c | 1 + include/trace/events/btrfs.h | 30 +++++------ 6 files changed, 47 insertions(+), 129 deletions(-) (limited to 'include') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 43c89952b7d2..aac240430efe 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -15,13 +15,12 @@ enum { WORK_DONE_BIT, WORK_ORDER_DONE_BIT, - WORK_HIGH_PRIO_BIT, }; #define NO_THRESHOLD (-1) #define DFT_THRESHOLD (32) -struct __btrfs_workqueue { +struct btrfs_workqueue { struct workqueue_struct *normal_wq; /* File system this workqueue services */ @@ -48,12 +47,7 @@ struct __btrfs_workqueue { spinlock_t thres_lock; }; -struct btrfs_workqueue { - struct __btrfs_workqueue *normal; - struct __btrfs_workqueue *high; -}; - -struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq) +struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct btrfs_workqueue *wq) { return wq->fs_info; } @@ -66,22 +60,22 @@ struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work) bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq) { /* - * We could compare wq->normal->pending with num_online_cpus() + * We could compare wq->pending with num_online_cpus() * to support "thresh == NO_THRESHOLD" case, but it requires * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's * postpone it until someone needs the support of that case. */ - if (wq->normal->thresh == NO_THRESHOLD) + if (wq->thresh == NO_THRESHOLD) return false; - return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; + return atomic_read(&wq->pending) > wq->thresh * 2; } -static struct __btrfs_workqueue * -__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, - unsigned int flags, int limit_active, int thresh) +struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, + const char *name, unsigned int flags, + int limit_active, int thresh) { - struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); + struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -105,12 +99,8 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, ret->thresh = thresh; } - if (flags & WQ_HIGHPRI) - ret->normal_wq = alloc_workqueue("btrfs-%s-high", flags, - ret->current_active, name); - else - ret->normal_wq = alloc_workqueue("btrfs-%s", flags, - ret->current_active, name); + ret->normal_wq = alloc_workqueue("btrfs-%s", flags, ret->current_active, + name); if (!ret->normal_wq) { kfree(ret); return NULL; @@ -119,41 +109,7 @@ __btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, INIT_LIST_HEAD(&ret->ordered_list); spin_lock_init(&ret->list_lock); spin_lock_init(&ret->thres_lock); - trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); - return ret; -} - -static inline void -__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); - -struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, - const char *name, - unsigned int flags, - int limit_active, - int thresh) -{ - struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); - - if (!ret) - return NULL; - - ret->normal = __btrfs_alloc_workqueue(fs_info, name, - flags & ~WQ_HIGHPRI, - limit_active, thresh); - if (!ret->normal) { - kfree(ret); - return NULL; - } - - if (flags & WQ_HIGHPRI) { - ret->high = __btrfs_alloc_workqueue(fs_info, name, flags, - limit_active, thresh); - if (!ret->high) { - __btrfs_destroy_workqueue(ret->normal); - kfree(ret); - return NULL; - } - } + trace_btrfs_workqueue_alloc(ret, name); return ret; } @@ -162,7 +118,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, * This hook WILL be called in IRQ handler context, * so workqueue_set_max_active MUST NOT be called in this hook */ -static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) +static inline void thresh_queue_hook(struct btrfs_workqueue *wq) { if (wq->thresh == NO_THRESHOLD) return; @@ -174,7 +130,7 @@ static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) * This hook is called in kthread content. * So workqueue_set_max_active is called here. */ -static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) +static inline void thresh_exec_hook(struct btrfs_workqueue *wq) { int new_current_active; long pending; @@ -217,7 +173,7 @@ out: } } -static void run_ordered_work(struct __btrfs_workqueue *wq, +static void run_ordered_work(struct btrfs_workqueue *wq, struct btrfs_work *self) { struct list_head *list = &wq->ordered_list; @@ -305,7 +261,7 @@ static void btrfs_work_helper(struct work_struct *normal_work) { struct btrfs_work *work = container_of(normal_work, struct btrfs_work, normal_work); - struct __btrfs_workqueue *wq; + struct btrfs_workqueue *wq = work->wq; int need_order = 0; /* @@ -318,7 +274,6 @@ static void btrfs_work_helper(struct work_struct *normal_work) */ if (work->ordered_func) need_order = 1; - wq = work->wq; trace_btrfs_work_sched(work); thresh_exec_hook(wq); @@ -350,8 +305,7 @@ void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func, work->flags = 0; } -static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, - struct btrfs_work *work) +void btrfs_queue_work(struct btrfs_workqueue *wq, struct btrfs_work *work) { unsigned long flags; @@ -366,54 +320,22 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, queue_work(wq->normal_wq, &work->normal_work); } -void btrfs_queue_work(struct btrfs_workqueue *wq, - struct btrfs_work *work) -{ - struct __btrfs_workqueue *dest_wq; - - if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) - dest_wq = wq->high; - else - dest_wq = wq->normal; - __btrfs_queue_work(dest_wq, work); -} - -static inline void -__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) -{ - destroy_workqueue(wq->normal_wq); - trace_btrfs_workqueue_destroy(wq); - kfree(wq); -} - void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) { if (!wq) return; - if (wq->high) - __btrfs_destroy_workqueue(wq->high); - __btrfs_destroy_workqueue(wq->normal); + destroy_workqueue(wq->normal_wq); + trace_btrfs_workqueue_destroy(wq); kfree(wq); } void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int limit_active) { - if (!wq) - return; - wq->normal->limit_active = limit_active; - if (wq->high) - wq->high->limit_active = limit_active; -} - -void btrfs_set_work_high_priority(struct btrfs_work *work) -{ - set_bit(WORK_HIGH_PRIO_BIT, &work->flags); + if (wq) + wq->limit_active = limit_active; } void btrfs_flush_workqueue(struct btrfs_workqueue *wq) { - if (wq->high) - flush_workqueue(wq->high->normal_wq); - - flush_workqueue(wq->normal->normal_wq); + flush_workqueue(wq->normal_wq); } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 3204daa51b95..07960529b360 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -11,8 +11,6 @@ struct btrfs_fs_info; struct btrfs_workqueue; -/* Internal use only */ -struct __btrfs_workqueue; struct btrfs_work; typedef void (*btrfs_func_t)(struct btrfs_work *arg); typedef void (*btrfs_work_func_t)(struct work_struct *arg); @@ -25,7 +23,7 @@ struct btrfs_work { /* Don't touch things below */ struct work_struct normal_work; struct list_head ordered_list; - struct __btrfs_workqueue *wq; + struct btrfs_workqueue *wq; unsigned long flags; }; @@ -40,9 +38,8 @@ void btrfs_queue_work(struct btrfs_workqueue *wq, struct btrfs_work *work); void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); -void btrfs_set_work_high_priority(struct btrfs_work *work); struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work); -struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq); +struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct btrfs_workqueue *wq); bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq); void btrfs_flush_workqueue(struct btrfs_workqueue *wq); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dd23f78664f1..aa3aea042ec5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -847,6 +847,7 @@ struct btrfs_fs_info { * two */ struct btrfs_workqueue *workers; + struct btrfs_workqueue *hipri_workers; struct btrfs_workqueue *delalloc_workers; struct btrfs_workqueue *flush_workers; struct btrfs_workqueue *endio_workers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d70289ef581a..807e7b272896 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -874,9 +874,9 @@ blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, async->status = 0; if (op_is_sync(bio->bi_opf)) - btrfs_set_work_high_priority(&async->work); - - btrfs_queue_work(fs_info->workers, &async->work); + btrfs_queue_work(fs_info->hipri_workers, &async->work); + else + btrfs_queue_work(fs_info->workers, &async->work); return 0; } @@ -2279,6 +2279,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { btrfs_destroy_workqueue(fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); + btrfs_destroy_workqueue(fs_info->hipri_workers); btrfs_destroy_workqueue(fs_info->workers); btrfs_destroy_workqueue(fs_info->endio_workers); btrfs_destroy_workqueue(fs_info->endio_raid56_workers); @@ -2457,7 +2458,9 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; fs_info->workers = - btrfs_alloc_workqueue(fs_info, "worker", + btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); + fs_info->hipri_workers = + btrfs_alloc_workqueue(fs_info, "worker-high", flags | WQ_HIGHPRI, max_active, 16); fs_info->delalloc_workers = @@ -2505,8 +2508,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) fs_info->discard_ctl.discard_workers = alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1); - if (!(fs_info->workers && fs_info->delalloc_workers && - fs_info->flush_workers && + if (!(fs_info->workers && fs_info->hipri_workers && + fs_info->delalloc_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->endio_meta_write_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 206f44005c52..2236024aca64 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1903,6 +1903,7 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, old_pool_size, new_pool_size); btrfs_workqueue_set_max(fs_info->workers, new_pool_size); + btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f068ff30d654..290f07eb050a 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -24,7 +24,7 @@ struct btrfs_free_cluster; struct map_lookup; struct extent_buffer; struct btrfs_work; -struct __btrfs_workqueue; +struct btrfs_workqueue; struct btrfs_qgroup_extent_record; struct btrfs_qgroup; struct extent_io_tree; @@ -1457,42 +1457,36 @@ DEFINE_EVENT(btrfs__work, btrfs_ordered_sched, TP_ARGS(work) ); -DECLARE_EVENT_CLASS(btrfs__workqueue, +DECLARE_EVENT_CLASS(btrfs_workqueue, - TP_PROTO(const struct __btrfs_workqueue *wq, - const char *name, int high), + TP_PROTO(const struct btrfs_workqueue *wq, const char *name), - TP_ARGS(wq, name, high), + TP_ARGS(wq, name), TP_STRUCT__entry_btrfs( __field( const void *, wq ) __string( name, name ) - __field( int , high ) ), TP_fast_assign_btrfs(btrfs_workqueue_owner(wq), __entry->wq = wq; __assign_str(name, name); - __entry->high = high; ), - TP_printk_btrfs("name=%s%s wq=%p", __get_str(name), - __print_flags(__entry->high, "", - {(WQ_HIGHPRI), "-high"}), + TP_printk_btrfs("name=%s wq=%p", __get_str(name), __entry->wq) ); -DEFINE_EVENT(btrfs__workqueue, btrfs_workqueue_alloc, +DEFINE_EVENT(btrfs_workqueue, btrfs_workqueue_alloc, - TP_PROTO(const struct __btrfs_workqueue *wq, - const char *name, int high), + TP_PROTO(const struct btrfs_workqueue *wq, const char *name), - TP_ARGS(wq, name, high) + TP_ARGS(wq, name) ); -DECLARE_EVENT_CLASS(btrfs__workqueue_done, +DECLARE_EVENT_CLASS(btrfs_workqueue_done, - TP_PROTO(const struct __btrfs_workqueue *wq), + TP_PROTO(const struct btrfs_workqueue *wq), TP_ARGS(wq), @@ -1507,9 +1501,9 @@ DECLARE_EVENT_CLASS(btrfs__workqueue_done, TP_printk_btrfs("wq=%p", __entry->wq) ); -DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, +DEFINE_EVENT(btrfs_workqueue_done, btrfs_workqueue_destroy, - TP_PROTO(const struct __btrfs_workqueue *wq), + TP_PROTO(const struct btrfs_workqueue *wq), TP_ARGS(wq) ); -- cgit v1.2.3 From f04fbcc64e4be16185151f9fca44ea1b3d074bd0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Apr 2022 16:08:27 +0800 Subject: btrfs: move definition of btrfs_raid_types to volumes.h It's only internally used as another way to represent btrfs profiles, it's not exposed through any on-disk format, in fact this btrfs_raid_types is diverted from the on-disk format values. Furthermore, since it's internal structure, its definition can change in the future. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/space-info.h | 2 ++ fs/btrfs/volumes.h | 13 +++++++++++++ include/uapi/linux/btrfs_tree.h | 13 ------------- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index a803e29bd781..c096695598c1 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -3,6 +3,8 @@ #ifndef BTRFS_SPACE_INFO_H #define BTRFS_SPACE_INFO_H +#include "volumes.h" + struct btrfs_space_info { spinlock_t lock; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 197877e684df..7b82aae89454 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -17,6 +17,19 @@ extern struct mutex uuid_mutex; #define BTRFS_STRIPE_LEN SZ_64K +enum btrfs_raid_types { + BTRFS_RAID_RAID10, + BTRFS_RAID_RAID1, + BTRFS_RAID_DUP, + BTRFS_RAID_RAID0, + BTRFS_RAID_SINGLE, + BTRFS_RAID_RAID5, + BTRFS_RAID_RAID6, + BTRFS_RAID_RAID1C3, + BTRFS_RAID_RAID1C4, + BTRFS_NR_RAID_TYPES +}; + struct btrfs_io_geometry { /* remaining bytes before crossing a stripe */ u64 len; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index b069752a8ecf..d4117152d907 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -880,19 +880,6 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ BTRFS_SPACE_INFO_GLOBAL_RSV) -enum btrfs_raid_types { - BTRFS_RAID_RAID10, - BTRFS_RAID_RAID1, - BTRFS_RAID_DUP, - BTRFS_RAID_RAID0, - BTRFS_RAID_SINGLE, - BTRFS_RAID_RAID5, - BTRFS_RAID_RAID6, - BTRFS_RAID_RAID1C3, - BTRFS_RAID_RAID1C4, - BTRFS_NR_RAID_TYPES -}; - #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ BTRFS_BLOCK_GROUP_SYSTEM | \ BTRFS_BLOCK_GROUP_METADATA) -- cgit v1.2.3 From 908c54909ae72dcbf1d7e1440f7297187d06c275 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 May 2022 15:11:10 -0500 Subject: iomap: allow the file system to provide a bio_set for direct I/O Allow the file system to provide a specific bio_set for allocating direct I/O bios. This will allow file systems that use the ->submit_io hook to stash away additional information for file system use. To make use of this additional space for information in the completion path, the file system needs to override the ->bi_end_io callback and then call back into iomap, so export iomap_dio_bio_end_io for that. Reviewed-by: Darrick J. Wong Reviewed-by: Nikolay Borisov Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/iomap/direct-io.c | 17 +++++++++++++---- include/linux/iomap.h | 11 +++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b08f5dc31780..314d8235f4ab 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -51,6 +51,15 @@ struct iomap_dio { }; }; +static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter, + struct iomap_dio *dio, unsigned short nr_vecs, unsigned int opf) +{ + if (dio->dops && dio->dops->bio_set) + return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf, + GFP_KERNEL, dio->dops->bio_set); + return bio_alloc(iter->iomap.bdev, nr_vecs, opf, GFP_KERNEL); +} + static void iomap_dio_submit_bio(const struct iomap_iter *iter, struct iomap_dio *dio, struct bio *bio, loff_t pos) { @@ -144,7 +153,7 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret) cmpxchg(&dio->error, 0, ret); } -static void iomap_dio_bio_end_io(struct bio *bio) +void iomap_dio_bio_end_io(struct bio *bio) { struct iomap_dio *dio = bio->bi_private; bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); @@ -176,16 +185,16 @@ static void iomap_dio_bio_end_io(struct bio *bio) bio_put(bio); } } +EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io); static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, loff_t pos, unsigned len) { struct inode *inode = file_inode(dio->iocb->ki_filp); struct page *page = ZERO_PAGE(0); - int flags = REQ_SYNC | REQ_IDLE; struct bio *bio; - bio = bio_alloc(iter->iomap.bdev, 1, REQ_OP_WRITE | flags, GFP_KERNEL); + bio = iomap_dio_alloc_bio(iter, dio, 1, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE); fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); @@ -311,7 +320,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, goto out; } - bio = bio_alloc(iomap->bdev, nr_pages, bio_opf, GFP_KERNEL); + bio = iomap_dio_alloc_bio(iter, dio, nr_pages, bio_opf); fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(iomap, pos); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b76f0dd149fb..cf903f1a230f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -320,6 +320,16 @@ struct iomap_dio_ops { unsigned flags); void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, loff_t file_offset); + + /* + * Filesystems wishing to attach private information to a direct io bio + * must provide a ->submit_io method that attaches the additional + * information to the bio and changes the ->bi_end_io callback to a + * custom function. This function should, at a minimum, perform any + * relevant post-processing of the bio and end with a call to + * iomap_dio_bio_end_io. + */ + struct bio_set *bio_set; }; /* @@ -349,6 +359,7 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, size_t done_before); ssize_t iomap_dio_complete(struct iomap_dio *dio); +void iomap_dio_bio_end_io(struct bio *bio); #ifdef CONFIG_SWAP struct file; -- cgit v1.2.3 From 786f847f43a54e63161474fe85a4f1764d871a35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 May 2022 15:11:11 -0500 Subject: iomap: add per-iomap_iter private data Allow the file system to keep state for all iterations. For now only wire it up for direct I/O as there is an immediate need for it there. Reviewed-by: Darrick J. Wong Reviewed-by: Nikolay Borisov Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- fs/erofs/data.c | 2 +- fs/ext4/file.c | 4 ++-- fs/f2fs/file.c | 4 ++-- fs/gfs2/file.c | 4 ++-- fs/iomap/direct-io.c | 8 +++++--- fs/xfs/xfs_file.c | 6 +++--- fs/zonefs/super.c | 4 ++-- include/linux/iomap.h | 5 +++-- 9 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eb951ac52d22..954e91b74d21 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8167,7 +8167,7 @@ static const struct iomap_dio_ops btrfs_dio_ops = { ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) { return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - IOMAP_DIO_PARTIAL, done_before); + IOMAP_DIO_PARTIAL, NULL, done_before); } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 780db1e5f4b7..91c11d5bb999 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -385,7 +385,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!err) return iomap_dio_rw(iocb, to, &erofs_iomap_ops, - NULL, 0, 0); + NULL, 0, NULL, 0); if (err < 0) return err; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6feb07e3e1eb..109d07629f81 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -76,7 +76,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0); + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, NULL, 0); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); @@ -565,7 +565,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0, - 0); + NULL, 0); if (ret == -ENOTBLK) ret = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5b89af0f27f0..04bc8709314b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4309,7 +4309,7 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) */ inc_page_count(sbi, F2FS_DIO_READ); dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, - &f2fs_iomap_dio_read_ops, 0, 0); + &f2fs_iomap_dio_read_ops, 0, NULL, 0); if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); if (ret != -EIOCBQUEUED) @@ -4527,7 +4527,7 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, if (pos + count > inode->i_size) dio_flags |= IOMAP_DIO_FORCE_WAIT; dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, - &f2fs_iomap_dio_write_ops, dio_flags, 0); + &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0); if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); if (ret == -ENOTBLK) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 2556ae1f92ea..0b07d5a7bb81 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -835,7 +835,7 @@ retry: pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, read); + IOMAP_DIO_PARTIAL, NULL, read); to->nofault = false; pagefault_enable(); if (ret <= 0 && ret != -EFAULT) @@ -898,7 +898,7 @@ retry: from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, written); + IOMAP_DIO_PARTIAL, NULL, written); from->nofault = false; if (ret <= 0) { if (ret == -ENOTBLK) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 314d8235f4ab..cf224a8bb311 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -483,7 +483,7 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter, struct iomap_dio * __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags, size_t done_before) + unsigned int dio_flags, void *private, size_t done_before) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); @@ -492,6 +492,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, .pos = iocb->ki_pos, .len = iov_iter_count(iter), .flags = IOMAP_DIRECT, + .private = private, }; loff_t end = iomi.pos + iomi.len - 1, ret = 0; bool wait_for_completion = @@ -683,11 +684,12 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags, size_t done_before) + unsigned int dio_flags, void *private, size_t done_before) { struct iomap_dio *dio; - dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before); + dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private, + done_before); if (IS_ERR_OR_NULL(dio)) return PTR_ERR_OR_ZERO(dio); return iomap_dio_complete(dio); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 5bddb1e9e0b3..85c412107a10 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -225,7 +225,7 @@ xfs_file_dio_read( ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); if (ret) return ret; - ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0); + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -534,7 +534,7 @@ xfs_file_dio_write_aligned( } trace_xfs_file_direct_write(iocb, from); ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, - &xfs_dio_write_ops, 0, 0); + &xfs_dio_write_ops, 0, NULL, 0); out_unlock: if (iolock) xfs_iunlock(ip, iolock); @@ -612,7 +612,7 @@ retry_exclusive: trace_xfs_file_direct_write(iocb, from); ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, - &xfs_dio_write_ops, flags, 0); + &xfs_dio_write_ops, flags, NULL, 0); /* * Retry unaligned I/O with exclusive blocking semantics if the DIO diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index e20e7c841489..777fe626c2b3 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -861,7 +861,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = zonefs_file_dio_append(iocb, from); else ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, - &zonefs_write_dio_ops, 0, 0); + &zonefs_write_dio_ops, 0, NULL, 0); if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && (ret > 0 || ret == -EIOCBQUEUED)) { if (ret > 0) @@ -996,7 +996,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } file_accessed(iocb->ki_filp); ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, - &zonefs_read_dio_ops, 0, 0); + &zonefs_read_dio_ops, 0, NULL, 0); } else { ret = generic_file_read_iter(iocb, to); if (ret == -EIO) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index cf903f1a230f..5b6f64f4d771 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -188,6 +188,7 @@ struct iomap_iter { unsigned flags; struct iomap iomap; struct iomap srcmap; + void *private; }; int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); @@ -354,10 +355,10 @@ struct iomap_dio_ops { ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags, size_t done_before); + unsigned int dio_flags, void *private, size_t done_before); struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags, size_t done_before); + unsigned int dio_flags, void *private, size_t done_before); ssize_t iomap_dio_complete(struct iomap_dio *dio); void iomap_dio_bio_end_io(struct bio *bio); -- cgit v1.2.3