diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-30 15:17:05 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-30 15:17:05 -0800 |
commit | e5da4c933c50d98d7990a7c1ca0bbf8946e80c4a (patch) | |
tree | 98ebcdbe2d053e4a1cd60d006dfd2ea6b6bc99a2 /fs/ext4/extents.c | |
parent | 9f68e3655aae6d49d6ba05dd263f99f33c2567af (diff) | |
parent | 7f6225e446cc8dfa4c3c7959a4de3dd03ec277bf (diff) | |
download | linux-e5da4c933c50d98d7990a7c1ca0bbf8946e80c4a.tar.bz2 |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"This merge window, we've added some performance improvements in how we
handle inode locking in the read/write paths, and improving the
performance of Direct I/O overwrites.
We also now record the error code which caused the first and most
recent ext4_error() report in the superblock, to make it easier to
root cause problems in production systems.
There are also many of the usual cleanups and miscellaneous bug fixes"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (49 commits)
jbd2: clean __jbd2_journal_abort_hard() and __journal_abort_soft()
jbd2: make sure ESHUTDOWN to be recorded in the journal superblock
ext4, jbd2: ensure panic when aborting with zero errno
jbd2: switch to use jbd2_journal_abort() when failed to submit the commit record
jbd2_seq_info_next should increase position index
jbd2: remove pointless assertion in __journal_remove_journal_head
ext4,jbd2: fix comment and code style
jbd2: delete the duplicated words in the comments
ext4: fix extent_status trace points
ext4: fix symbolic enum printing in trace output
ext4: choose hardlimit when softlimit is larger than hardlimit in ext4_statfs_project()
ext4: fix race conditions in ->d_compare() and ->d_hash()
ext4: make dioread_nolock the default
ext4: fix extent_status fragmentation for plain files
jbd2: clear JBD2_ABORT flag before journal_reset to update log tail info when load journal
ext4: drop ext4_kvmalloc()
ext4: Add EXT4_IOC_FSGETXATTR/EXT4_IOC_FSSETXATTR to compat_ioctl
ext4: remove unused macro MPAGE_DA_EXTENT_TAIL
ext4: add missing braces in ext4_ext_drop_refs()
ext4: fix some nonstandard indentation in extents.c
...
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 205 |
1 files changed, 76 insertions, 129 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e8708b77da6..954013d6076b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -161,8 +161,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode, * - ENOMEM * - EIO */ -int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, - struct inode *inode, struct ext4_ext_path *path) +static int __ext4_ext_dirty(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) { int err; @@ -179,6 +180,9 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle, return err; } +#define ext4_ext_dirty(handle, inode, path) \ + __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path)) + static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) @@ -309,53 +313,6 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode, (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); } -/* - * Calculate the number of metadata blocks needed - * to allocate @blocks - * Worse case is one block per extent - */ -int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - int idxs; - - idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) - / sizeof(struct ext4_extent_idx)); - - /* - * If the new delayed allocation block is contiguous with the - * previous da block, it can share index blocks with the - * previous block, so we only need to allocate a new index - * block every idxs leaf blocks. At ldxs**2 blocks, we need - * an additional index block, and at ldxs**3 blocks, yet - * another index blocks. - */ - if (ei->i_da_metadata_calc_len && - ei->i_da_metadata_calc_last_lblock+1 == lblock) { - int num = 0; - - if ((ei->i_da_metadata_calc_len % idxs) == 0) - num++; - if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) - num++; - if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { - num++; - ei->i_da_metadata_calc_len = 0; - } else - ei->i_da_metadata_calc_len++; - ei->i_da_metadata_calc_last_lblock++; - return num; - } - - /* - * In the worst case we need a new set of index blocks at - * every level of the inode's extent tree. - */ - ei->i_da_metadata_calc_len = 1; - ei->i_da_metadata_calc_last_lblock = lblock; - return ext_depth(inode) + 1; -} - static int ext4_ext_max_entries(struct inode *inode, int depth) { @@ -492,6 +449,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, return 0; corrupted: + ext4_set_errno(inode->i_sb, -err); ext4_error_inode(inode, function, line, 0, "pblk %llu bad header/extent: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", @@ -510,6 +468,30 @@ int ext4_ext_check_inode(struct inode *inode) return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); } +static void ext4_cache_extents(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); + ext4_lblk_t prev = 0; + int i; + + for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { + unsigned int status = EXTENT_STATUS_WRITTEN; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); + int len = ext4_ext_get_actual_len(ex); + + if (prev && (prev != lblk)) + ext4_es_cache_extent(inode, prev, lblk - prev, ~0, + EXTENT_STATUS_HOLE); + + if (ext4_ext_is_unwritten(ex)) + status = EXTENT_STATUS_UNWRITTEN; + ext4_es_cache_extent(inode, lblk, len, + ext4_ext_pblock(ex), status); + prev = lblk + len; + } +} + static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, struct inode *inode, ext4_fsblk_t pblk, int depth, @@ -544,26 +526,7 @@ __read_extent_tree_block(const char *function, unsigned int line, */ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { struct ext4_extent_header *eh = ext_block_hdr(bh); - struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); - ext4_lblk_t prev = 0; - int i; - - for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { - unsigned int status = EXTENT_STATUS_WRITTEN; - ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); - int len = ext4_ext_get_actual_len(ex); - - if (prev && (prev != lblk)) - ext4_es_cache_extent(inode, prev, - lblk - prev, ~0, - EXTENT_STATUS_HOLE); - - if (ext4_ext_is_unwritten(ex)) - status = EXTENT_STATUS_UNWRITTEN; - ext4_es_cache_extent(inode, lblk, len, - ext4_ext_pblock(ex), status); - prev = lblk + len; - } + ext4_cache_extents(inode, eh); } return bh; errout: @@ -649,8 +612,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) ext_debug("path:"); for (k = 0; k <= l; k++, path++) { if (path->p_idx) { - ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), - ext4_idx_pblock(path->p_idx)); + ext_debug(" %d->%llu", + le32_to_cpu(path->p_idx->ei_block), + ext4_idx_pblock(path->p_idx)); } else if (path->p_ext) { ext_debug(" %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), @@ -731,11 +695,12 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path) if (!path) return; depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) + for (i = 0; i <= depth; i++, path++) { if (path->p_bh) { brelse(path->p_bh); path->p_bh = NULL; } + } } /* @@ -777,8 +742,8 @@ ext4_ext_binsearch_idx(struct inode *inode, chix = ix = EXT_FIRST_INDEX(eh); for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { - if (k != 0 && - le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { + if (k != 0 && le32_to_cpu(ix->ei_block) <= + le32_to_cpu(ix[-1].ei_block)) { printk(KERN_DEBUG "k=%d, ix=0x%p, " "first=0x%p\n", k, ix, EXT_FIRST_INDEX(eh)); @@ -911,6 +876,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[0].p_bh = NULL; i = depth; + if (!(flags & EXT4_EX_NOCACHE) && depth == 0) + ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) { ext_debug("depth %d: num %d, max %d\n", @@ -1632,17 +1599,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) return EXT_MAX_BLOCKS; while (depth >= 0) { + struct ext4_ext_path *p = &path[depth]; + if (depth == path->p_depth) { /* leaf */ - if (path[depth].p_ext && - path[depth].p_ext != - EXT_LAST_EXTENT(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_ext[1].ee_block); + if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr)) + return le32_to_cpu(p->p_ext[1].ee_block); } else { /* index */ - if (path[depth].p_idx != - EXT_LAST_INDEX(path[depth].p_hdr)) - return le32_to_cpu(path[depth].p_idx[1].ei_block); + if (p->p_idx != EXT_LAST_INDEX(p->p_hdr)) + return le32_to_cpu(p->p_idx[1].ei_block); } depth--; } @@ -1742,9 +1708,9 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, return err; } -int -ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, - struct ext4_extent *ex2) +static int ext4_can_extents_be_merged(struct inode *inode, + struct ext4_extent *ex1, + struct ext4_extent *ex2) { unsigned short ext1_ee_len, ext2_ee_len; @@ -1758,11 +1724,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, le32_to_cpu(ex2->ee_block)) return 0; - /* - * To allow future support for preallocated extents to be added - * as an RO_COMPAT feature, refuse to merge to extents if - * this can result in the top bit of ee_len being set. - */ if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) return 0; @@ -1870,13 +1831,14 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, } /* - * This function tries to merge the @ex extent to neighbours in the tree. - * return 1 if merge left else 0. + * This function tries to merge the @ex extent to neighbours in the tree, then + * tries to collapse the extent tree into the inode. */ static void ext4_ext_try_to_merge(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent *ex) { + struct ext4_extent *ex) +{ struct ext4_extent_header *eh; unsigned int depth; int merge_done = 0; @@ -3718,9 +3680,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); - if (IS_ENCRYPTED(inode)) - max_zeroout = 0; - /* * five cases: * 1. split the extent into three extents. @@ -4706,6 +4665,10 @@ retry: return ret > 0 ? ret2 : ret; } +static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); + +static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len); + static long ext4_zero_range(struct file *file, loff_t offset, loff_t len, int mode) { @@ -4723,9 +4686,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, trace_ext4_zero_range(inode, offset, len, mode); - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - /* Call ext4_force_commit to flush all data in case of data=journal. */ if (ext4_should_journal_data(inode)) { ret = ext4_force_commit(inode->i_sb); @@ -4765,7 +4725,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, } if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len > i_size_read(inode) || + (offset + len > inode->i_size || offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); @@ -4849,7 +4809,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, * Mark that we allocate beyond EOF so the subsequent truncate * can proceed even if the new size is the same as i_size. */ - if ((offset + len) > i_size_read(inode)) + if (offset + len > inode->i_size) ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } ext4_mark_inode_dirty(handle, inode); @@ -4890,14 +4850,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) * range since we would need to re-encrypt blocks with a * different IV or XTS tweak (which are based on the logical * block number). - * - * XXX It's not clear why zero range isn't working, but we'll - * leave it disabled for encrypted inodes for now. This is a - * bug we should fix.... */ if (IS_ENCRYPTED(inode) && - (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE | - FALLOC_FL_ZERO_RANGE))) + (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; /* Return error if mode is not supported */ @@ -4941,7 +4896,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) } if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len > i_size_read(inode) || + (offset + len > inode->i_size || offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); @@ -5268,7 +5223,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, { int depth, err = 0; struct ext4_extent *ex_start, *ex_last; - bool update = 0; + bool update = false; depth = path->p_depth; while (depth >= 0) { @@ -5284,7 +5239,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, goto out; if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) - update = 1; + update = true; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { @@ -5472,7 +5427,7 @@ out: * This implements the fallocate's collapse range functionality for ext4 * Returns: 0 and non-zero on error. */ -int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) { struct super_block *sb = inode->i_sb; ext4_lblk_t punch_start, punch_stop; @@ -5489,12 +5444,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return -EOPNOTSUPP; - /* Collapse range works only on fs block size aligned offsets. */ - if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || - len & (EXT4_CLUSTER_SIZE(sb) - 1)) - return -EINVAL; - - if (!S_ISREG(inode->i_mode)) + /* Collapse range works only on fs cluster size aligned regions. */ + if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb))) return -EINVAL; trace_ext4_collapse_range(inode, offset, len); @@ -5514,7 +5465,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) * There is no need to overlap collapse range with EOF, in which case * it is effectively a truncate operation */ - if (offset + len >= i_size_read(inode)) { + if (offset + len >= inode->i_size) { ret = -EINVAL; goto out_mutex; } @@ -5592,7 +5543,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_stop; } - new_size = i_size_read(inode) - len; + new_size = inode->i_size - len; i_size_write(inode, new_size); EXT4_I(inode)->i_disksize = new_size; @@ -5620,7 +5571,7 @@ out_mutex: * by len bytes. * Returns 0 on success, error otherwise. */ -int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) { struct super_block *sb = inode->i_sb; handle_t *handle; @@ -5639,14 +5590,10 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return -EOPNOTSUPP; - /* Insert range works only on fs block size aligned offsets. */ - if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || - len & (EXT4_CLUSTER_SIZE(sb) - 1)) + /* Insert range works only on fs cluster size aligned regions. */ + if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb))) return -EINVAL; - if (!S_ISREG(inode->i_mode)) - return -EOPNOTSUPP; - trace_ext4_insert_range(inode, offset, len); offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); @@ -5666,14 +5613,14 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Check for wrap through zero */ - if (inode->i_size + len > inode->i_sb->s_maxbytes) { + /* Check whether the maximum file size would be exceeded */ + if (len > inode->i_sb->s_maxbytes - inode->i_size) { ret = -EFBIG; goto out_mutex; } - /* Offset should be less than i_size */ - if (offset >= i_size_read(inode)) { + /* Offset must be less than i_size */ + if (offset >= inode->i_size) { ret = -EINVAL; goto out_mutex; } |