diff options
46 files changed, 200 insertions, 224 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 619af9bfdcb3..75eea7ce3d7c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -194,7 +194,7 @@ prototypes: void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + int (*direct_IO)(struct kiocb *, struct iov_iter *iter); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 4164bd6397a2..c61a223ef3ff 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -591,7 +591,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 69aa15e8e3ef..0c3459c1a518 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -358,14 +358,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, */ #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \ PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, - loff_t file_offset) +static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) { struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ccc_object *obj = cl_inode2ccc(inode); + loff_t file_offset = iocb->ki_pos; ssize_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index ac9225e86bf3..c37fb9c08970 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -245,9 +245,10 @@ static int v9fs_launder_page(struct page *page) * */ static ssize_t -v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; + loff_t pos = iocb->ki_pos; ssize_t n; int err = 0; if (iov_iter_rw(iter) == WRITE) { diff --git a/fs/affs/file.c b/fs/affs/file.c index 0cde550050e8..0deec9cc2362 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -389,12 +389,13 @@ static void affs_write_failed(struct address_space *mapping, loff_t to) } static ssize_t -affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; ssize_t ret; if (iov_iter_rw(iter) == WRITE) { @@ -404,7 +405,7 @@ affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) return 0; } - ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, affs_get_block); if (ret < 0 && iov_iter_rw(iter) == WRITE) affs_write_failed(mapping, offset + count); return ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02b77c4..a063d4d8ac39 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -162,15 +162,15 @@ static struct inode *bdev_file_inode(struct file *file) } static ssize_t -blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); if (IS_DAX(inode)) - return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, + return dax_do_io(iocb, inode, iter, blkdev_get_block, NULL, DIO_SKIP_DIO_COUNT); - return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, + return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, blkdev_get_block, NULL, NULL, DIO_SKIP_DIO_COUNT); } @@ -1660,12 +1660,8 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); - if (ret > 0) { - ssize_t err; - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); blk_finish_plug(&plug); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8d7b5a45c005..ea9f10bb089c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1703,18 +1703,17 @@ again: return num_written ? num_written : ret; } -static ssize_t __btrfs_direct_write(struct kiocb *iocb, - struct iov_iter *from, - loff_t pos) +static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + loff_t pos = iocb->ki_pos; ssize_t written; ssize_t written_buffered; loff_t endbyte; int err; - written = generic_file_direct_write(iocb, from, pos); + written = generic_file_direct_write(iocb, from); if (written < 0 || !iov_iter_count(from)) return written; @@ -1832,7 +1831,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, atomic_inc(&BTRFS_I(inode)->sync_writers); if (iocb->ki_flags & IOCB_DIRECT) { - num_written = __btrfs_direct_write(iocb, from, pos); + num_written = __btrfs_direct_write(iocb, from); } else { num_written = __btrfs_buffered_write(file, from, pos); if (num_written > 0) @@ -1852,11 +1851,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->last_sub_trans = root->log_transid; spin_unlock(&BTRFS_I(inode)->lock); - if (num_written > 0) { - err = generic_write_sync(file, pos, num_written); - if (err < 0) - num_written = err; - } + if (num_written > 0) + num_written = generic_write_sync(iocb, num_written); if (sync) atomic_dec(&BTRFS_I(inode)->sync_writers); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e2ada1267f3..da5799c56160 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8541,13 +8541,13 @@ out: return retval; } -static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_dio_data dio_data = { 0 }; + loff_t offset = iocb->ki_pos; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8607,7 +8607,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ret = __blockdev_direct_IO(iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, - iter, offset, btrfs_get_blocks_direct, NULL, + iter, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4801571f51cb..43098cd9602b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1292,8 +1292,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, * intercept O_DIRECT reads and writes early, this function should * never get called. */ -static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter) { WARN_ON(1); return -EINVAL; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a79f9269831e..4f1dc7120916 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1382,12 +1382,11 @@ retry_snap: ceph_cap_string(got)); ceph_put_cap_refs(ci, got); - if (written >= 0 && - ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { - err = vfs_fsync_range(file, pos, pos + written - 1, 1); - if (err < 0) - written = err; + if (written >= 0) { + if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)) + iocb->ki_flags |= IOCB_DSYNC; + + written = generic_write_sync(iocb, written); } goto out_unlocked; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index c03d0744648b..9b51d4936a29 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2687,11 +2687,8 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) out: inode_unlock(inode); - if (rc > 0) { - ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } + if (rc > 0) + rc = generic_write_sync(iocb, rc); up_read(&cinode->lock_sem); return rc; } @@ -3854,7 +3851,7 @@ void cifs_oplock_break(struct work_struct *work) * Direct IO is not yet supported in the cached mode. */ static ssize_t -cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) { /* * FIXME @@ -244,7 +244,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, * @iocb: The control block for this I/O * @inode: The file which the I/O is directed at * @iter: The addresses to do I/O from or to - * @pos: The file offset where the I/O starts * @get_block: The filesystem method used to translate file offsets to blocks * @end_io: A filesystem callback for I/O completion * @flags: See below @@ -257,11 +256,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, * is in progress. */ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t pos, get_block_t get_block, + struct iov_iter *iter, get_block_t get_block, dio_iodone_t end_io, int flags) { struct buffer_head bh; ssize_t retval = -EINVAL; + loff_t pos = iocb->ki_pos; loff_t end = pos + iov_iter_count(iter); memset(&bh, 0, sizeof(bh)); diff --git a/fs/direct-io.c b/fs/direct-io.c index 472037732daf..3bf3f20f8ecc 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -224,9 +224,9 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, - bool is_async) +static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) { + loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; /* @@ -256,6 +256,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, if (dio->end_io) { int err; + // XXX: ki_pos?? err = dio->end_io(dio->iocb, offset, ret, dio->private); if (err) ret = err; @@ -265,15 +266,15 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, inode_dio_end(dio->inode); if (is_async) { - if (dio->rw & WRITE) { - int err; - - err = generic_write_sync(dio->iocb->ki_filp, offset, - transferred); - if (err < 0 && ret > 0) - ret = err; - } + /* + * generic_write_sync expects ki_pos to have been updated + * already, but the submission path only does this for + * synchronous I/O. + */ + dio->iocb->ki_pos += transferred; + if (dio->rw & WRITE) + ret = generic_write_sync(dio->iocb, transferred); dio->iocb->ki_complete(dio->iocb, ret, 0); } @@ -285,7 +286,7 @@ static void dio_aio_complete_work(struct work_struct *work) { struct dio *dio = container_of(work, struct dio, complete_work); - dio_complete(dio, dio->iocb->ki_pos, 0, true); + dio_complete(dio, 0, true); } static int dio_bio_complete(struct dio *dio, struct bio *bio); @@ -314,7 +315,7 @@ static void dio_bio_end_aio(struct bio *bio) queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); } else { - dio_complete(dio, dio->iocb->ki_pos, 0, true); + dio_complete(dio, 0, true); } } } @@ -1113,7 +1114,7 @@ static inline int drop_refcount(struct dio *dio) static inline ssize_t do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, - loff_t offset, get_block_t get_block, dio_iodone_t end_io, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); @@ -1121,6 +1122,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; loff_t end = offset + count; struct dio *dio; struct dio_submit sdio = { 0, }; @@ -1318,7 +1320,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, offset, retval, false); + retval = dio_complete(dio, retval, false); } else BUG_ON(retval != -EIOCBQUEUED); @@ -1328,7 +1330,7 @@ out: ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, - loff_t offset, get_block_t get_block, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { @@ -1344,7 +1346,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, prefetch(bdev->bd_queue); prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); - return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block, + return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block, end_io, submit_io, flags); } diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 49e1bd00b4ec..9dc4c6dbf3c9 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -960,8 +960,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset, /* TODO: Should be easy enough to do proprly */ -static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { return 0; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 6bd58e6ff038..b675610391b8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -854,20 +854,20 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) } static ssize_t -ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; ssize_t ret; if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL, + ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL, DIO_LOCKING); else - ret = blockdev_direct_IO(iocb, inode, iter, offset, - ext2_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); if (ret < 0 && iov_iter_rw(iter) == WRITE) ext2_write_failed(mapping, offset + count); return ret; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 349afebe21ee..72f4c9e00e97 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2581,8 +2581,7 @@ extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); -extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset); +extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index fa2208bae2e1..00ff6912adb3 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -169,13 +169,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); inode_unlock(inode); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); if (o_direct) blk_finish_plug(&plug); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3027fa681de5..627b7e8f9ef3 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -659,12 +659,12 @@ out: * crashes then stale disk data _may_ be exposed inside the file. But current * VFS code falls back into buffered path in that case so we are safe. */ -ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); + loff_t offset = iocb->ki_pos; handle_t *handle; ssize_t ret; int orphan = 0; @@ -707,21 +707,21 @@ retry: goto locked; } if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, + ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0); else ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - offset, ext4_dio_get_block, + ext4_dio_get_block, NULL, NULL, 0); inode_dio_end(inode); } else { locked: if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, + ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, DIO_LOCKING); else - ret = blockdev_direct_IO(iocb, inode, iter, offset, + ret = blockdev_direct_IO(iocb, inode, iter, ext4_dio_get_block); if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 981a1fc30eaa..79b298d397b4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3334,12 +3334,12 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * if the machine crashes during the write. * */ -static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; + loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(iter); int overwrite = 0; get_block_t *get_block_func = NULL; @@ -3348,7 +3348,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, /* Use the old path for reads and writes beyond i_size. */ if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(iocb, iter, offset); + return ext4_ind_direct_IO(iocb, iter); BUG_ON(iocb->private == NULL); @@ -3400,11 +3400,11 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); #endif if (IS_DAX(inode)) - ret = dax_do_io(iocb, inode, iter, offset, get_block_func, + ret = dax_do_io(iocb, inode, iter, get_block_func, ext4_end_io_dio, dio_flags); else ret = __blockdev_direct_IO(iocb, inode, - inode->i_sb->s_bdev, iter, offset, + inode->i_sb->s_bdev, iter, get_block_func, ext4_end_io_dio, NULL, dio_flags); @@ -3431,12 +3431,12 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return ret; } -static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; ssize_t ret; #ifdef CONFIG_EXT4_FS_ENCRYPTION @@ -3456,9 +3456,9 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(iocb, iter, offset); + ret = ext4_ext_direct_IO(iocb, iter); else - ret = ext4_ind_direct_IO(iocb, iter, offset); + ret = ext4_ind_direct_IO(iocb, iter); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5dafb9cef12e..bb376c3bca62 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1665,12 +1665,12 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, return 0; } -static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; int err; err = check_direct_IO(inode, iter, offset); @@ -1682,7 +1682,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); + err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); if (err < 0 && iov_iter_rw(iter) == WRITE) f2fs_write_failed(mapping, offset + count); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 90d1157a09f9..eb9d027e5981 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1886,13 +1886,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } inode_unlock(inode); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); return ret; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 226281068a46..3bcf57925dca 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -244,13 +244,13 @@ static int fat_write_end(struct file *file, struct address_space *mapping, return err; } -static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; ssize_t ret; if (iov_iter_rw(iter) == WRITE) { @@ -272,7 +272,7 @@ static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, fat_get_block); if (ret < 0 && iov_iter_rw(iter) == WRITE) fat_write_failed(mapping, offset + count); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index dcad5e210525..9154f8679024 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1186,7 +1186,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; - written = generic_file_direct_write(iocb, from, pos); + written = generic_file_direct_write(iocb, from); if (written < 0 || !iov_iter_count(from)) goto out; @@ -2837,7 +2837,7 @@ static inline loff_t fuse_round_up(loff_t off) } static ssize_t -fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { DECLARE_COMPLETION_ONSTACK(wait); ssize_t ret = 0; @@ -2848,6 +2848,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) struct inode *inode; loff_t i_size; size_t count = iov_iter_count(iter); + loff_t offset = iocb->ki_pos; struct fuse_io_priv *io; bool is_sync = is_sync_kiocb(iocb); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 1bbbee945f46..8524c0e322fc 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1042,13 +1042,13 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) -static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); + loff_t offset = iocb->ki_pos; struct gfs2_holder gh; int rv; @@ -1099,7 +1099,7 @@ static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - offset, gfs2_get_block_direct, NULL, NULL, 0); + gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index a3e7358e3cd1..e53b723abd3b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -895,7 +895,10 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t mark_inode_dirty(inode); } - return generic_write_sync(file, pos, count); + if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + return vfs_fsync_range(file, pos, pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); + return 0; out_trans_fail: gfs2_inplace_release(ip); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index ba533c79a806..8eed66af5b82 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -124,8 +124,7 @@ static int hfs_releasepage(struct page *page, gfp_t mask) return res ? try_to_free_buffers(page) : 0; } -static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -133,7 +132,7 @@ static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, hfs_get_block); /* * In case of error extending write may have instantiated a few @@ -141,7 +140,7 @@ static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + count; + loff_t end = iocb->ki_pos + count; if (end > isize) hfs_write_failed(mapping, end); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 037f738c5871..ef9fefe364a6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -122,8 +122,7 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) return res ? try_to_free_buffers(page) : 0; } -static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -131,7 +130,7 @@ static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, hfsplus_get_block); /* * In case of error extending write may have instantiated a few @@ -139,7 +138,7 @@ static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter, */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + count; + loff_t end = iocb->ki_pos + count; if (end > isize) hfsplus_write_failed(mapping, end); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index da1d02116a19..ad3e7b1effc4 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -332,8 +332,7 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, jfs_get_block); } -static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -341,7 +340,7 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, jfs_get_block); /* * In case of error extending write may have instantiated a few @@ -349,7 +348,7 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + count; + loff_t end = iocb->ki_pos + count; if (end > isize) jfs_write_failed(mapping, end); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 438d4e70742f..741a92c470bb 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -250,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, * shunt off direct read and write requests before the VFS gets them, * so this method is only ever called for swap. */ -ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -261,7 +261,7 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter, pos); + return nfs_file_direct_read(iocb, iter); return nfs_file_direct_write(iocb, iter); } @@ -545,7 +545,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data - * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -561,8 +560,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -574,7 +572,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", - file, count, (long long) pos); + file, count, (long long) iocb->ki_pos); result = 0; if (!count) @@ -594,7 +592,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, dreq->inode = inode; dreq->bytes_left = count; - dreq->io_start = pos; + dreq->io_start = iocb->ki_pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -606,14 +604,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, dreq->iocb = iocb; NFS_I(inode)->read_io += count; - result = nfs_direct_read_schedule_iovec(dreq, iter, pos); + result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); inode_unlock(inode); if (!result) { result = nfs_direct_wait(dreq); if (result > 0) - iocb->ki_pos = pos + result; + iocb->ki_pos += result; } nfs_direct_req_release(dreq); @@ -969,7 +967,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data - * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -1057,7 +1054,9 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) if (i_size_read(inode) < iocb->ki_pos) i_size_write(inode, iocb->ki_pos); spin_unlock(&inode->i_lock); - generic_write_sync(file, pos, result); + + /* XXX: should check the generic_write_sync retval */ + generic_write_sync(iocb, result); } } nfs_direct_req_release(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index be01095b97ae..717a8d6af52d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -164,7 +164,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_read(iocb, to, iocb->ki_pos); + return nfs_file_direct_read(iocb, to); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d40010e4f1a9..6fbd81ecb410 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -935,8 +935,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int stable = *stablep; int use_wgather; loff_t pos = offset; - loff_t end = LLONG_MAX; unsigned int pflags = current->flags; + int flags = 0; if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* @@ -955,9 +955,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (!EX_ISSYNC(exp)) stable = 0; + if (stable && !use_wgather) + flags |= RWF_SYNC; + /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, flags); set_fs(oldfs); if (host_err < 0) goto out_nfserr; @@ -965,15 +968,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsdstats.io_write += host_err; fsnotify_modify(file); - if (stable) { - if (use_wgather) { - host_err = wait_for_concurrent_writes(file); - } else { - if (*cnt) - end = offset + *cnt - 1; - host_err = vfs_fsync_range(file, offset, end, 0); - } - } + if (stable && use_wgather) + host_err = wait_for_concurrent_writes(file); out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 534631358b13..cfebcd2fc7f3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -305,7 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, } static ssize_t -nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = file_inode(iocb->ki_filp); @@ -313,7 +313,7 @@ nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) return 0; /* Needs synchronization with the cleaner */ - return blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); + return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); } const struct address_space_operations nilfs_aops = { diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 91117ada8528..5622ed5a201e 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1952,12 +1952,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) written = ntfs_perform_write(file, from, iocb->ki_pos); current->backing_dev_info = NULL; inode_unlock(vi); - if (likely(written > 0)) { - err = generic_write_sync(file, iocb->ki_pos, written); - if (err < 0) - written = 0; - } iocb->ki_pos += written; + if (likely(written > 0)) + written = generic_write_sync(iocb, written); return written ? written : err; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index abb0b0bf7c7f..c034edf3ef38 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2423,13 +2423,11 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, return 0; } -static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file)->i_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - loff_t end = offset + iter->count; get_block_t *get_block; /* @@ -2440,7 +2438,8 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return 0; /* Fallback to buffered I/O if we do not support append dio. */ - if (end > i_size_read(inode) && !ocfs2_supports_append_dio(osb)) + if (iocb->ki_pos + iter->count > i_size_read(inode) && + !ocfs2_supports_append_dio(osb)) return 0; if (iov_iter_rw(iter) == READ) @@ -2449,7 +2448,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, get_block = ocfs2_dio_get_block; return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, - iter, offset, get_block, + iter, get_block, ocfs2_dio_end_io, NULL, 0); } diff --git a/fs/read_write.c b/fs/read_write.c index 69c7c3c2955c..56425cd5c9e7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -686,12 +686,16 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, struct kiocb kiocb; ssize_t ret; - if (flags & ~RWF_HIPRI) + if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC)) return -EOPNOTSUPP; init_sync_kiocb(&kiocb, filp); if (flags & RWF_HIPRI) kiocb.ki_flags |= IOCB_HIPRI; + if (flags & RWF_DSYNC) + kiocb.ki_flags |= IOCB_DSYNC; + if (flags & RWF_SYNC) + kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC); kiocb.ki_pos = *ppos; ret = fn(&kiocb, iter); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d5c2e9c865de..825455d3e4ba 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3279,15 +3279,14 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) * We thank Mingming Cao for helping us understand in great detail what * to do in this section of the code. */ -static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(iocb, inode, iter, offset, + ret = blockdev_direct_IO(iocb, inode, iter, reiserfs_get_blocks_direct_io); /* @@ -3296,7 +3295,7 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + count; + loff_t end = iocb->ki_pos + count; if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { truncate_setsize(inode, isize); diff --git a/fs/udf/file.c b/fs/udf/file.c index 877ba1c9b461..632570617327 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -99,8 +99,7 @@ static int udf_adinicb_write_begin(struct file *file, return 0; } -static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { /* Fallback to buffered I/O. */ return 0; @@ -153,9 +152,7 @@ out: if (retval > 0) { mark_inode_dirty(inode); - err = generic_write_sync(file, iocb->ki_pos - retval, retval); - if (err < 0) - retval = err; + retval = generic_write_sync(iocb, retval); } return retval; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 2dc461eeb415..f323aff740ef 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -214,8 +214,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, return ret; } -static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -223,9 +222,9 @@ static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, udf_get_block); if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE)) - udf_write_failed(mapping, offset + count); + udf_write_failed(mapping, iocb->ki_pos + count); return ret; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e49b2406d15d..c535887c60a8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1406,8 +1406,7 @@ xfs_end_io_direct_write( STATIC ssize_t xfs_vm_direct_IO( struct kiocb *iocb, - struct iov_iter *iter, - loff_t offset) + struct iov_iter *iter) { struct inode *inode = iocb->ki_filp->f_mapping->host; dio_iodone_t *endio = NULL; @@ -1420,12 +1419,12 @@ xfs_vm_direct_IO( } if (IS_DAX(inode)) { - return dax_do_io(iocb, inode, iter, offset, + return dax_do_io(iocb, inode, iter, xfs_get_blocks_direct, endio, 0); } bdev = xfs_find_bdev_for_inode(inode); - return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, + return __blockdev_direct_IO(iocb, inode, bdev, iter, xfs_get_blocks_direct, endio, NULL, flags); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 345fd85a1997..85ce3032f815 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -718,18 +718,19 @@ xfs_file_dio_aio_write( int unaligned_io = 0; int iolock; size_t count = iov_iter_count(from); - loff_t pos = iocb->ki_pos; loff_t end; struct iov_iter data; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ - if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask)) + if (!IS_DAX(inode) && + ((iocb->ki_pos | count) & target->bt_logical_sectormask)) return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ - if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) + if ((iocb->ki_pos & mp->m_blockmask) || + ((iocb->ki_pos + count) & mp->m_blockmask)) unaligned_io = 1; /* @@ -760,8 +761,7 @@ xfs_file_dio_aio_write( if (ret) goto out; count = iov_iter_count(from); - pos = iocb->ki_pos; - end = pos + count - 1; + end = iocb->ki_pos + count - 1; /* * See xfs_file_read_iter() for why we do a full-file flush here. @@ -794,19 +794,18 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); data = *from; - ret = mapping->a_ops->direct_IO(iocb, &data, pos); + ret = mapping->a_ops->direct_IO(iocb, &data); /* see generic_file_direct_write() for why this is necessary */ if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); } if (ret > 0) { - pos += ret; + iocb->ki_pos += ret; iov_iter_advance(from, ret); - iocb->ki_pos = pos; } out: xfs_rw_iunlock(ip, iolock); @@ -904,14 +903,10 @@ xfs_file_write_iter( ret = xfs_file_buffered_aio_write(iocb, from); if (ret > 0) { - ssize_t err; - XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; + ret = generic_write_sync(iocb, ret); } return ret; } diff --git a/include/linux/dax.h b/include/linux/dax.h index 636dd59ab505..982a6c4a62f3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -5,7 +5,7 @@ #include <linux/mm.h> #include <asm/pgtable.h> -ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); diff --git a/include/linux/fs.h b/include/linux/fs.h index 98a18ccf3fc0..851390c8d75b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -323,6 +323,8 @@ struct writeback_control; #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) #define IOCB_HIPRI (1 << 3) +#define IOCB_DSYNC (1 << 4) +#define IOCB_SYNC (1 << 5) struct kiocb { struct file *ki_filp; @@ -394,7 +396,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. @@ -2515,13 +2517,25 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) -{ - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) - return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); + +/* + * Sync the bytes written if this was a synchronous write. Expect ki_pos + * to already be updated for the write, and will return either the amount + * of bytes passed in, or an error if syncing the file failed. + */ +static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) +{ + if (iocb->ki_flags & IOCB_DSYNC) { + int ret = vfs_fsync_range(iocb->ki_filp, + iocb->ki_pos - count, iocb->ki_pos - 1, + (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); + if (ret) + return ret; + } + + return count; } + extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK @@ -2733,7 +2747,7 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); @@ -2796,18 +2810,17 @@ void dio_end_io(struct bio *bio, int error); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, - loff_t offset, get_block_t get_block, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t offset, + struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - offset, get_block, NULL, NULL, - DIO_LOCKING | DIO_SKIP_HOLES); + get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif @@ -2973,6 +2986,10 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (io_is_direct(file)) res |= IOCB_DIRECT; + if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + res |= IOCB_DSYNC; + if (file->f_flags & __O_SYNC) + res |= IOCB_SYNC; return res; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fa167f25465d..d71278c3c5bd 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -443,10 +443,9 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a079d50376e1..e21fe04acc12 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -324,5 +324,7 @@ struct fscrypt_policy { /* flags for preadv2/pwritev2: */ #define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */ +#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ +#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ #endif /* _UAPI_LINUX_FS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index f2479af09da9..182b21825255 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1838,8 +1838,6 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; ssize_t retval = 0; - loff_t *ppos = &iocb->ki_pos; - loff_t pos = *ppos; size_t count = iov_iter_count(iter); if (!count) @@ -1851,15 +1849,15 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t size; size = i_size_read(inode); - retval = filemap_write_and_wait_range(mapping, pos, - pos + count - 1); + retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, + iocb->ki_pos + count - 1); if (!retval) { struct iov_iter data = *iter; - retval = mapping->a_ops->direct_IO(iocb, &data, pos); + retval = mapping->a_ops->direct_IO(iocb, &data); } if (retval > 0) { - *ppos = pos + retval; + iocb->ki_pos += retval; iov_iter_advance(iter, retval); } @@ -1872,14 +1870,14 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. */ - if (retval < 0 || !iov_iter_count(iter) || *ppos >= size || + if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size || IS_DAX(inode)) { file_accessed(file); goto out; } } - retval = do_generic_file_read(file, ppos, iter, retval); + retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval); out: return retval; } @@ -2500,11 +2498,12 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + loff_t pos = iocb->ki_pos; ssize_t written; size_t write_len; pgoff_t end; @@ -2538,7 +2537,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) } data = *from; - written = mapping->a_ops->direct_IO(iocb, &data, pos); + written = mapping->a_ops->direct_IO(iocb, &data); /* * Finally, try again to invalidate clean pages which might have been @@ -2718,7 +2717,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos, endbyte; - written = generic_file_direct_write(iocb, from, iocb->ki_pos); + written = generic_file_direct_write(iocb, from); /* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to @@ -2792,13 +2791,8 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); inode_unlock(inode); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); - if (err < 0) - ret = err; - } + if (ret > 0) + ret = generic_write_sync(iocb, ret); return ret; } EXPORT_SYMBOL(generic_file_write_iter); diff --git a/mm/page_io.c b/mm/page_io.c index 985f23cfa79b..242dba07545b 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -279,7 +279,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(&kiocb, &from, kiocb.ki_pos); + ret = mapping->a_ops->direct_IO(&kiocb, &from); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; |