summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-09-30 01:03:17 -0400
committerTheodore Ts'o <tytso@mit.edu>2016-09-30 01:03:17 -0400
commit16c54688592ce8eea85d2a26d37b64fa07e3e233 (patch)
tree9c8b9ce94df5b69a6c1befcc735f9ba665b6cba2
parentcca32b7eeb4ea24fa6596650e06279ad9130af98 (diff)
downloadlinux-16c54688592ce8eea85d2a26d37b64fa07e3e233.tar.bz2
ext4: Allow parallel DIO reads
We can easily support parallel direct IO reads. We only have to make sure we cannot expose uninitialized data by reading allocated block to which data was not written yet, or which was already truncated. That is easily achieved by holding inode_lock in shared mode - that excludes all writes, truncates, hole punches. We also have to guard against page writeback allocating blocks for delay-allocated pages - that race is handled by the fact that we writeback all the pages in the affected range and the lock protects us from new pages being created there. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/inode.c40
1 files changed, 18 insertions, 22 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 39883158b970..d8a4afc5eedb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3528,35 +3528,31 @@ out:
static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
- int unlocked = 0;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ struct inode *inode = mapping->host;
ssize_t ret;
- if (ext4_should_dioread_nolock(inode)) {
- /*
- * Nolock dioread optimization may be dynamically disabled
- * via ext4_inode_block_unlocked_dio(). Check inode's state
- * while holding extra i_dio_count ref.
- */
- inode_dio_begin(inode);
- smp_mb();
- if (unlikely(ext4_test_inode_state(inode,
- EXT4_STATE_DIOREAD_LOCK)))
- inode_dio_end(inode);
- else
- unlocked = 1;
- }
+ /*
+ * Shared inode_lock is enough for us - it protects against concurrent
+ * writes & truncates and since we take care of writing back page cache,
+ * we are protected against page writeback as well.
+ */
+ inode_lock_shared(inode);
if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
- NULL, unlocked ? 0 : DIO_LOCKING);
+ ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
} else {
+ size_t count = iov_iter_count(iter);
+
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+ iocb->ki_pos + count);
+ if (ret)
+ goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, ext4_dio_get_block,
- NULL, NULL,
- unlocked ? 0 : DIO_LOCKING);
+ NULL, NULL, 0);
}
- if (unlocked)
- inode_dio_end(inode);
+out_unlock:
+ inode_unlock_shared(inode);
return ret;
}