From 1e21196c8e3e210cbef130e23e40c4adc3d9ce10 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Sun, 30 Jul 2017 22:26:40 -0400 Subject: ext4: correct comment references to ext4_ext_direct_IO() Commit 914f82a32d0268847 "ext4: refactor direct IO code" deleted ext4_ext_direct_IO(), but references to that function remain in comments. Update them to refer to ext4_direct_IO_write(). Signed-off-by: Eric Whitney Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Reviewed-by: Jan Kara --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3c600f02673f..2e6c02258ee2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -892,7 +892,7 @@ static int ext4_dio_get_block_unwritten_async(struct inode *inode, /* * Get block function for non-AIO DIO writes when we create unwritten extent if * blocks are not allocated yet. The extent will be converted to written - * after IO is complete from ext4_ext_direct_IO() function. + * after IO is complete by ext4_direct_IO_write(). */ static int ext4_dio_get_block_unwritten_sync(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -907,7 +907,7 @@ static int ext4_dio_get_block_unwritten_sync(struct inode *inode, /* * Mark inode as having pending DIO writes to unwritten extents. - * ext4_ext_direct_IO() checks this flag and converts extents to + * ext4_direct_IO_write() checks this flag and converts extents to * written. */ if (!ret && buffer_unwritten(bh_result)) -- cgit v1.2.3 From a627b0a7c15ee4d2c87a86d5be5c8167382e8d0d Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Sun, 30 Jul 2017 22:30:11 -0400 Subject: ext4: remove unused metadata accounting variables Two variables in ext4_inode_info, i_reserved_meta_blocks and i_allocated_meta_blocks, are unused. Removing them saves a little memory per in-memory inode and cleans up clutter in several tracepoints. Adjust tracepoint output from ext4_alloc_da_blocks() for consistency and fix a typo and whitespace near these changes. Signed-off-by: Eric Whitney Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/ext4.h | 6 ++---- fs/ext4/super.c | 2 -- include/trace/events/ext4.h | 35 ++++++++--------------------------- 3 files changed, 10 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9ebde0cd632e..dcbcd9d444d1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -961,7 +961,7 @@ struct ext4_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ @@ -1049,10 +1049,8 @@ struct ext4_inode_info { ext4_group_t i_last_alloc_group; /* allocation reservation info for delalloc */ - /* In case of bigalloc, these refer to clusters rather than blocks */ + /* In case of bigalloc, this refer to clusters rather than blocks */ unsigned int i_reserved_data_blocks; - unsigned int i_reserved_meta_blocks; - unsigned int i_allocated_meta_blocks; ext4_lblk_t i_da_metadata_calc_last_lblock; int i_da_metadata_calc_len; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0886fe82e9c4..d61a70e2193a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_es_shk_nr = 0; ei->i_es_shrink_lblk = 0; ei->i_reserved_data_blocks = 0; - ei->i_reserved_meta_blocks = 0; - ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index dfae175ddebc..9c3bc3883d2f 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -937,21 +937,19 @@ TRACE_EVENT(ext4_alloc_da_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( unsigned int, data_blocks ) - __field( unsigned int, meta_blocks ) + __field( unsigned int, data_blocks ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; ), - TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u", + TP_printk("dev %d,%d ino %lu reserved_data_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, - __entry->data_blocks, __entry->meta_blocks) + __entry->data_blocks) ); TRACE_EVENT(ext4_mballoc_alloc, @@ -1153,8 +1151,6 @@ TRACE_EVENT(ext4_da_update_reserve_space, __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) - __field( int, reserved_meta_blocks ) - __field( int, allocated_meta_blocks ) __field( int, quota_claim ) __field( __u16, mode ) ), @@ -1166,22 +1162,16 @@ TRACE_EVENT(ext4_da_update_reserve_space, __entry->used_blocks = used_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = - EXT4_I(inode)->i_reserved_meta_blocks; - __entry->allocated_meta_blocks = - EXT4_I(inode)->i_allocated_meta_blocks; __entry->quota_claim = quota_claim; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " - "reserved_data_blocks %d reserved_meta_blocks %d " - "allocated_meta_blocks %d quota_claim %d", + "reserved_data_blocks %d quota_claim %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, - __entry->reserved_meta_blocks, __entry->allocated_meta_blocks, __entry->quota_claim) ); @@ -1195,7 +1185,6 @@ TRACE_EVENT(ext4_da_reserve_space, __field( ino_t, ino ) __field( __u64, i_blocks ) __field( int, reserved_data_blocks ) - __field( int, reserved_meta_blocks ) __field( __u16, mode ) ), @@ -1204,17 +1193,15 @@ TRACE_EVENT(ext4_da_reserve_space, __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu " - "reserved_data_blocks %d reserved_meta_blocks %d", + "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, - __entry->reserved_data_blocks, - __entry->reserved_meta_blocks) + __entry->reserved_data_blocks) ); TRACE_EVENT(ext4_da_release_space, @@ -1228,8 +1215,6 @@ TRACE_EVENT(ext4_da_release_space, __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) - __field( int, reserved_meta_blocks ) - __field( int, allocated_meta_blocks ) __field( __u16, mode ) ), @@ -1239,19 +1224,15 @@ TRACE_EVENT(ext4_da_release_space, __entry->i_blocks = inode->i_blocks; __entry->freed_blocks = freed_blocks; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; - __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; - __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; __entry->mode = inode->i_mode; ), TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d " - "reserved_data_blocks %d reserved_meta_blocks %d " - "allocated_meta_blocks %d", + "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, - __entry->freed_blocks, __entry->reserved_data_blocks, - __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) + __entry->freed_blocks, __entry->reserved_data_blocks) ); DECLARE_EVENT_CLASS(ext4__bitmap_load, -- cgit v1.2.3 From 397e434176bb62bc6068d2210af1d876c6212a7e Mon Sep 17 00:00:00 2001 From: "Ernesto A. Fernández" Date: Sun, 30 Jul 2017 22:43:41 -0400 Subject: ext4: preserve i_mode if __ext4_set_acl() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When changing a file's acl mask, __ext4_set_acl() will first set the group bits of i_mode to the value of the mask, and only then set the actual extended attribute representing the new acl. If the second part fails (due to lack of space, for example) and the file had no acl attribute to begin with, the system will from now on assume that the mask permission bits are actual group permission bits, potentially granting access to the wrong users. Prevent this by only changing the inode mode after the acl has been set. Signed-off-by: Ernesto A. Fernández Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/acl.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 09441ae07a5b..2985cd0a640d 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -189,16 +189,17 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, void *value = NULL; size_t size = 0; int error; + int update_mode = 0; + umode_t mode = inode->i_mode; switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); + update_mode = 1; } break; @@ -221,8 +222,14 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, value, size, xattr_flags); kfree(value); - if (!error) + if (!error) { set_cached_acl(inode, type, acl); + if (update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + ext4_mark_inode_dirty(handle, inode); + } + } return error; } -- cgit v1.2.3 From a3bb2d5587521eea6dab2d05326abb0afb460abd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 30 Jul 2017 23:33:01 -0400 Subject: ext4: Don't clear SGID when inheriting ACLs When new directory 'DIR1' is created in a directory 'DIR0' with SGID bit set, DIR1 is expected to have SGID bit set (and owning group equal to the owning group of 'DIR0'). However when 'DIR0' also has some default ACLs that 'DIR1' inherits, setting these ACLs will result in SGID bit on 'DIR1' to get cleared if user is not member of the owning group. Fix the problem by moving posix_acl_update_mode() out of __ext4_set_acl() into ext4_set_acl(). That way the function will not be called when inheriting ACLs which is what we want as it prevents SGID bit clearing and the mode has been properly set by posix_acl_create() anyway. Fixes: 073931017b49d9458aa351605b43a7e34598caef CC: stable@vger.kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Jan Kara Reviewed-by: Andreas Gruenbacher --- fs/ext4/acl.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 2985cd0a640d..46ff2229ff5e 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -189,18 +189,10 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, void *value = NULL; size_t size = 0; int error; - int update_mode = 0; - umode_t mode = inode->i_mode; switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &mode, &acl); - if (error) - return error; - update_mode = 1; - } break; case ACL_TYPE_DEFAULT: @@ -224,11 +216,6 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type, kfree(value); if (!error) { set_cached_acl(inode, type, acl); - if (update_mode) { - inode->i_mode = mode; - inode->i_ctime = current_time(inode); - ext4_mark_inode_dirty(handle, inode); - } } return error; @@ -240,6 +227,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle_t *handle; int error, credits, retries = 0; size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0; + umode_t mode = inode->i_mode; + int update_mode = 0; error = dquot_initialize(inode); if (error) @@ -254,7 +243,20 @@ retry: if (IS_ERR(handle)) return PTR_ERR(handle); + if ((type == ACL_TYPE_ACCESS) && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto out_stop; + update_mode = 1; + } + error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + ext4_mark_inode_dirty(handle, inode); + } +out_stop: ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; -- cgit v1.2.3 From 191eac33009e6a6d31e87cfa425a20d0e79704b4 Mon Sep 17 00:00:00 2001 From: Emoly Liu Date: Mon, 31 Jul 2017 00:40:22 -0400 Subject: ext4: error should be cleared if ea_inode isn't added to the cache For Lustre, if ea_inode fails in hash validation but passes parent inode and generation checks, it won't be added to the cache as well as the error "-EFSCORRUPTED" should be cleared, otherwise it will cause "Structure needs cleaning" when running getfattr command. Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9723 Cc: stable@vger.kernel.org Fixes: dec214d00e0d78a08b947d7dccdfdb84407a9f4d Signed-off-by: Emoly Liu Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Reviewed-by: tahsin@google.com --- fs/ext4/xattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index cff4f41ced61..de217a094733 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -451,6 +451,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, } /* Do not add ea_inode to the cache. */ ea_inode_cache = NULL; + err = 0; } else if (err) goto out; -- cgit v1.2.3 From 9c5d58fb9e1df8c9ead4daf9e770ce61e4256e02 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 31 Jul 2017 00:55:34 -0400 Subject: ext4: convert swap_inode_data() over to use swap() on most of the fields For some odd reason, it forces a byte-by-byte copy of each field. A plain old swap() on most of these fields would be more efficient. We do need to retain the memswap of i_data however as that field is an array. Signed-off-by: Theodore Ts'o Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- fs/ext4/ioctl.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 42b3a73143cf..4f4a8391585c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -64,18 +64,16 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); - memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags)); - memswap(&inode1->i_version, &inode2->i_version, - sizeof(inode1->i_version)); - memswap(&inode1->i_blocks, &inode2->i_blocks, - sizeof(inode1->i_blocks)); - memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes)); - memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime)); - memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime)); + swap(inode1->i_flags, inode2->i_flags); + swap(inode1->i_version, inode2->i_version); + swap(inode1->i_blocks, inode2->i_blocks); + swap(inode1->i_bytes, inode2->i_bytes); + swap(inode1->i_atime, inode2->i_atime); + swap(inode1->i_mtime, inode2->i_mtime); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); - memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); - memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); + swap(ei1->i_flags, ei2->i_flags); + swap(ei1->i_disksize, ei2->i_disksize); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); -- cgit v1.2.3 From e45105772db41c5318b2a7ec1c420183183414e3 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sat, 5 Aug 2017 13:11:57 -0400 Subject: ext4: release discard bio after sending discard commands We've changed the discard command handling into parallel manner. But, in this change, I forgot decreasing the usage count of the bio which was used to send discard request. I'm sorry about that. Fixes: a015434480dc ("ext4: send parallel discards on commit completions") Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/mballoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 581e357e8406..8779893d74e5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2892,8 +2892,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) break; } - if (discard_bio) + if (discard_bio) { submit_bio_wait(discard_bio); + bio_put(discard_bio); + } } list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) -- cgit v1.2.3 From fcf5ea10992fbac3c7473a1db33d56a139333cd1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 5 Aug 2017 17:43:24 -0400 Subject: ext4: fix SEEK_HOLE/SEEK_DATA for blocksize < pagesize ext4_find_unwritten_pgoff() does not properly handle a situation when starting index is in the middle of a page and blocksize < pagesize. The following command shows the bug on filesystem with 1k blocksize: xfs_io -f -c "falloc 0 4k" \ -c "pwrite 1k 1k" \ -c "pwrite 3k 1k" \ -c "seek -a -r 0" foo In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048, SEEK_DATA) will return the correct result. Fix the problem by neglecting buffers in a page before starting offset. Reported-by: Andreas Gruenbacher Signed-off-by: Theodore Ts'o Signed-off-by: Jan Kara CC: stable@vger.kernel.org # 3.8+ --- fs/ext4/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 58294c9a7e1d..0d7cf0cc9b87 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -537,6 +537,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, lastoff = page_offset(page); bh = head = page_buffers(page); do { + if (lastoff + bh->b_size <= startoff) + goto next; if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) @@ -551,6 +553,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); goto out; } +next: lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); -- cgit v1.2.3 From 381cebfe72e17e495281c07c976797f5a397515a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Aug 2017 19:00:31 -0400 Subject: ext4: silence array overflow warning I get a static checker warning: fs/ext4/ext4.h:3091 ext4_set_de_type() error: buffer overflow 'ext4_type_by_mode' 15 <= 15 It seems unlikely that we would hit this read overflow in real life, but it's also simple enough to make the array 16 bytes instead of 15. Signed-off-by: Dan Carpenter Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index dcbcd9d444d1..2d6d4c952492 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3072,7 +3072,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); #define S_SHIFT 12 -static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { +static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, -- cgit v1.2.3 From c7414892067204fcb8f8ebb4309d0fdd8c7242fe Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Sat, 5 Aug 2017 19:47:34 -0400 Subject: ext4: fix dir_nlink behaviour The dir_nlink feature has been enabled by default for new ext4 filesystems since e2fsprogs-1.41 in 2008, and was automatically enabled by the kernel for older ext4 filesystems since the dir_nlink feature was added with ext4 in kernel 2.6.28+ when the subdirectory count exceeded EXT4_LINK_MAX-1. Automatically adding the file system features such as dir_nlink is generally frowned upon, since it could cause the file system to not be mountable on older kernel, thus preventing the administrator from rolling back to an older kernel if necessary. In this case, the administrator might also want to disable the feature because glibc's fts_read() function does not correctly optimize directory traversal for directories that use st_nlinks field of 1 to indicate that the number of links in the directory are not tracked by the file system, and could fail to traverse the full directory hierarchy. Fortunately, in the past ten years very few users have complained about incomplete file system traversal by glibc's fts_read(). This commit also changes ext4_inc_count() to allow i_nlinks to reach the full EXT4_LINK_MAX links on the parent directory (including "." and "..") before changing i_links_count to be 1. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196405 Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 ++- fs/ext4/namei.c | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2d6d4c952492..d50229e92c55 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2020,7 +2020,8 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define is_dx(dir) (ext4_has_feature_dir_index((dir)->i_sb) && \ ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) -#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) +#define EXT4_DIR_LINK_MAX(dir) unlikely((dir)->i_nlink >= EXT4_LINK_MAX && \ + !(ext4_has_feature_dir_nlink((dir)->i_sb) && is_dx(dir))) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) /* Legal values for the dx_root hash_version field: */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 13f0cadb1238..cc986b824181 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2395,19 +2395,22 @@ out: } /* - * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, - * since this indicates that nlinks count was previously 1. + * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2 + * since this indicates that nlinks count was previously 1 to avoid overflowing + * the 16-bit i_links_count field on disk. Directories with i_nlink == 1 mean + * that subdirectory link counts are not being maintained accurately. + * + * The caller has already checked for i_nlink overflow in case the DIR_LINK + * feature is not enabled and returned -EMLINK. The is_dx() check is a proxy + * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set + * on regular files) and to avoid creating huge/slow non-HTREE directories. */ static void ext4_inc_count(handle_t *handle, struct inode *inode) { inc_nlink(inode); - if (is_dx(inode) && inode->i_nlink > 1) { - /* limit is 16-bit i_links_count */ - if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { - set_nlink(inode, 1); - ext4_set_feature_dir_nlink(inode->i_sb); - } - } + if (is_dx(inode) && + (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2)) + set_nlink(inode, 1); } /* -- cgit v1.2.3 From 2df2c3402fc81918a888e1ec711369f6014471f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 5 Aug 2017 21:57:46 -0400 Subject: ext4: fix warning about stack corruption After commit 62d1034f53e3 ("fortify: use WARN instead of BUG for now"), we get a warning about possible stack overflow from a memcpy that was not strictly bounded to the size of the local variable: inlined from 'ext4_mb_seq_groups_show' at fs/ext4/mballoc.c:2322:2: include/linux/string.h:309:9: error: '__builtin_memcpy': writing between 161 and 1116 bytes into a region of size 160 overflows the destination [-Werror=stringop-overflow=] We actually had a bug here that would have been found by the warning, but it was already fixed last year in commit 30a9d7afe70e ("ext4: fix stack memory corruption with 64k block size"). This replaces the fixed-length structure on the stack with a variable-length structure, using the correct upper bound that tells the compiler that everything is really fine here. I also change the loop count to check for the same upper bound for consistency, but the existing code is already correct here. Note that while clang won't allow certain kinds of variable-length arrays in structures, this particular instance is fine, as the array is at the end of the structure, and the size is strictly bounded. Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8779893d74e5..5a1052627a81 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2295,9 +2295,12 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) int err, buddy_loaded = 0; struct ext4_buddy e4b; struct ext4_group_info *grinfo; + unsigned char blocksize_bits = min_t(unsigned char, + sb->s_blocksize_bits, + EXT4_MAX_BLOCK_LOG_SIZE); struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; + ext4_grpblk_t counters[blocksize_bits + 2]; } sg; group--; @@ -2306,8 +2309,6 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); - i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + - sizeof(struct ext4_group_info); grinfo = ext4_get_group_info(sb, group); /* Load the group info in memory only if not already loaded. */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) { @@ -2319,7 +2320,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) buddy_loaded = 1; } - memcpy(&sg, ext4_get_group_info(sb, group), i); + memcpy(&sg, ext4_get_group_info(sb, group), sizeof(sg)); if (buddy_loaded) ext4_mb_unload_buddy(&e4b); @@ -2327,7 +2328,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, sg.info.bb_fragments, sg.info.bb_first_free); for (i = 0; i <= 13; i++) - seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? + seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); seq_printf(seq, " ]\n"); -- cgit v1.2.3 From 77a2e84d51729da7957d19283523af7b571d61f6 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 5 Aug 2017 22:15:45 -0400 Subject: ext4: remove unused mode parameter ext4_alloc_file_blocks() does not use its mode parameter. Remove it. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e0a8425ff74d..230ca74c4841 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4652,7 +4652,7 @@ retry: static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ext4_lblk_t len, loff_t new_size, - int flags, int mode) + int flags) { struct inode *inode = file_inode(file); handle_t *handle; @@ -4815,7 +4815,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits) >> blkbits, (round_up((offset + len), 1 << blkbits) - round_down(offset, 1 << blkbits)) >> blkbits, - new_size, flags, mode); + new_size, flags); if (ret) goto out_dio; @@ -4841,7 +4841,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, inode->i_mtime = inode->i_ctime = current_time(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + flags); up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) goto out_dio; @@ -4976,8 +4976,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; -- cgit v1.2.3 From ec00022030da5761518476096626338bd67df57a Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 5 Aug 2017 22:41:42 -0400 Subject: ext4: inplace xattr block update fails to deduplicate blocks When an xattr block has a single reference, block is updated inplace and it is reinserted to the cache. Later, a cache lookup is performed to see whether an existing block has the same contents. This cache lookup will most of the time return the just inserted entry so deduplication is not achieved. Running the following test script will produce two xattr blocks which can be observed in "File ACL: " line of debugfs output: mke2fs -b 1024 -I 128 -F -O extent /dev/sdb 1G mount /dev/sdb /mnt/sdb touch /mnt/sdb/{x,y} setfattr -n user.1 -v aaa /mnt/sdb/x setfattr -n user.2 -v bbb /mnt/sdb/x setfattr -n user.1 -v aaa /mnt/sdb/y setfattr -n user.2 -v bbb /mnt/sdb/y debugfs -R 'stat x' /dev/sdb | cat debugfs -R 'stat y' /dev/sdb | cat This patch defers the reinsertion to the cache so that we can locate other blocks with the same contents. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/xattr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index de217a094733..4025666c5991 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1816,9 +1816,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); - if (!error) - ext4_xattr_block_cache_insert(ea_block_cache, - bs->bh); ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); if (error == -EFSCORRUPTED) @@ -1974,6 +1971,7 @@ inserted: } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_block_cache_insert(ea_block_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { -- cgit v1.2.3 From 9699d4f91d9bd2f70dcc37afe3c9f18145ab2dba Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sun, 6 Aug 2017 00:07:01 -0400 Subject: ext4: make xattr inode reads faster ext4_xattr_inode_read() currently reads each block sequentially while waiting for io operation to complete before moving on to the next block. This prevents request merging in block layer. Add a ext4_bread_batch() function that starts reads for all blocks then optionally waits for them to complete. A similar logic is used in ext4_find_entry(), so update that code to use the new function. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/inode.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/namei.c | 43 ++++++++++++++----------------------------- fs/ext4/xattr.c | 51 ++++++++++++++++++++++++++++++++------------------- 4 files changed, 92 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d50229e92c55..a2bb7d2870e4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2461,6 +2461,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid); int ext4_inode_is_fast_symlink(struct inode *inode); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, + bool wait, struct buffer_head **bhs); int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2e6c02258ee2..56bca45bcdf4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1015,6 +1015,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return ERR_PTR(-EIO); } +/* Read a contiguous batch of blocks. */ +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, + bool wait, struct buffer_head **bhs) +{ + int i, err; + + for (i = 0; i < bh_count; i++) { + bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */); + if (IS_ERR(bhs[i])) { + err = PTR_ERR(bhs[i]); + bh_count = i; + goto out_brelse; + } + } + + for (i = 0; i < bh_count; i++) + /* Note that NULL bhs[i] is valid because of holes. */ + if (bhs[i] && !buffer_uptodate(bhs[i])) + ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, + &bhs[i]); + + if (!wait) + return 0; + + for (i = 0; i < bh_count; i++) + if (bhs[i]) + wait_on_buffer(bhs[i]); + + for (i = 0; i < bh_count; i++) { + if (bhs[i] && !buffer_uptodate(bhs[i])) { + err = -EIO; + goto out_brelse; + } + } + return 0; + +out_brelse: + for (i = 0; i < bh_count; i++) { + brelse(bhs[i]); + bhs[i] = NULL; + } + return err; +} + int ext4_walk_page_buffers(handle_t *handle, struct buffer_head *head, unsigned from, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cc986b824181..c1cf020d1889 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1342,13 +1342,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; - ext4_lblk_t start, block, b; + ext4_lblk_t start, block; const u8 *name = d_name->name; - int ra_max = 0; /* Number of bh's in the readahead + size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ - int ra_ptr = 0; /* Current index into readahead + size_t ra_ptr = 0; /* Current index into readahead buffer */ - int num = 0; ext4_lblk_t nblocks; int i, namelen, retval; struct ext4_filename fname; @@ -1411,31 +1410,17 @@ restart: if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; - b = block; - for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { - /* - * Terminate if we reach the end of the - * directory and must wrap, or if our - * search has finished at this block. - */ - if (b >= nblocks || (num && block == start)) { - bh_use[ra_max] = NULL; - break; - } - num++; - bh = ext4_getblk(NULL, dir, b++, 0); - if (IS_ERR(bh)) { - if (ra_max == 0) { - ret = bh; - goto cleanup_and_exit; - } - break; - } - bh_use[ra_max] = bh; - if (bh) - ll_rw_block(REQ_OP_READ, - REQ_META | REQ_PRIO, - 1, &bh); + if (block < start) + ra_max = start - block; + else + ra_max = nblocks - block; + ra_max = min(ra_max, ARRAY_SIZE(bh_use)); + retval = ext4_bread_batch(dir, block, ra_max, + false /* wait */, bh_use); + if (retval) { + ret = ERR_PTR(retval); + ra_max = 0; + goto cleanup_and_exit; } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4025666c5991..5fa912e5d2a6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) */ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) { - unsigned long block = 0; - struct buffer_head *bh; - int blocksize = ea_inode->i_sb->s_blocksize; - size_t csize, copied = 0; - void *copy_pos = buf; - - while (copied < size) { - csize = (size - copied) > blocksize ? blocksize : size - copied; - bh = ext4_bread(NULL, ea_inode, block, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - if (!bh) - return -EFSCORRUPTED; + int blocksize = 1 << ea_inode->i_blkbits; + int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits; + int tail_size = (size % blocksize) ?: blocksize; + struct buffer_head *bhs_inline[8]; + struct buffer_head **bhs = bhs_inline; + int i, ret; + + if (bh_count > ARRAY_SIZE(bhs_inline)) { + bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS); + if (!bhs) + return -ENOMEM; + } - memcpy(copy_pos, bh->b_data, csize); - brelse(bh); + ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, + true /* wait */, bhs); + if (ret) + goto free_bhs; - copy_pos += csize; - block += 1; - copied += csize; + for (i = 0; i < bh_count; i++) { + /* There shouldn't be any holes in ea_inode. */ + if (!bhs[i]) { + ret = -EFSCORRUPTED; + goto put_bhs; + } + memcpy((char *)buf + blocksize * i, bhs[i]->b_data, + i < bh_count - 1 ? blocksize : tail_size); } - return 0; + ret = 0; +put_bhs: + for (i = 0; i < bh_count; i++) + brelse(bhs[i]); +free_bhs: + if (bhs != bhs_inline) + kfree(bhs); + return ret; } static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, -- cgit v1.2.3 From 3b10fdc6d8bd048f4fb14af5eda2051ace7b8b16 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 00:27:38 -0400 Subject: ext4: fix forgetten xattr lock protection in ext4_expand_extra_isize We should avoid the contention between the i_extra_isize update and the inline data insertion, so move the xattr trylock in front of i_extra_isize update. Signed-off-by: Miao Xie Reviewed-by: Wang Shilong --- fs/ext4/inode.c | 18 ++++++++++++++++-- fs/ext4/xattr.c | 10 ---------- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 56bca45bcdf4..5dabbf276651 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5713,10 +5713,15 @@ static int ext4_expand_extra_isize(struct inode *inode, { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + int no_expand; + int error; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) return 0; + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return 0; + raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); @@ -5728,12 +5733,21 @@ static int ext4_expand_extra_isize(struct inode *inode, EXT4_I(inode)->i_extra_isize, 0, new_extra_isize - EXT4_I(inode)->i_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; + ext4_write_unlock_xattr(inode, &no_expand); return 0; } /* try to expand with EAs present */ - return ext4_expand_extra_isize_ea(inode, new_extra_isize, - raw_inode, handle); + error = ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); + if (error) { + /* + * Inode size expansion failed; don't try again + */ + no_expand = 1; + } + ext4_write_unlock_xattr(inode, &no_expand); + return error; } /* diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 5fa912e5d2a6..862ba3891398 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2645,10 +2645,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ - int no_expand; - - if (ext4_write_trylock_xattr(inode, &no_expand) == 0) - return 0; retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; @@ -2731,16 +2727,10 @@ shift: EXT4_I(inode)->i_extra_isize = new_extra_isize; brelse(bh); out: - ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: brelse(bh); - /* - * Inode size expansion failed; don't try again - */ - no_expand = 1; - ext4_write_unlock_xattr(inode, &no_expand); return error; } -- cgit v1.2.3 From cf0a5e818fe216dbdf5da4e829e157d27ebfc8a4 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 00:40:01 -0400 Subject: ext4: restructure ext4_expand_extra_isize Current ext4_expand_extra_isize just tries to expand extra isize, if someone is holding xattr lock or some check fails, it will give up. So rename its name to ext4_try_to_expand_extra_isize. Besides that, we clean up unnecessary check and move some relative checks into it. Signed-off-by: Miao Xie Signed-off-by: Theodore Ts'o Reviewed-by: Wang Shilong --- fs/ext4/inode.c | 67 ++++++++++++++++++++++++--------------------------------- fs/ext4/xattr.c | 9 +++++++- 2 files changed, 36 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5dabbf276651..713b67e85f73 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5706,21 +5706,35 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, * Expand an inode by new_extra_isize bytes. * Returns 0 on success or negative error number on failure. */ -static int ext4_expand_extra_isize(struct inode *inode, - unsigned int new_extra_isize, - struct ext4_iloc iloc, - handle_t *handle) +static int ext4_try_to_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc iloc, + handle_t *handle) { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; int no_expand; int error; - if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - return 0; + if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) + return -EOVERFLOW; + + /* + * In nojournal mode, we can immediately attempt to expand + * the inode. When journaled, we first need to obtain extra + * buffer credits since we may write into the EA block + * with this same handle. If journal_extend fails, then it will + * only result in a minor loss of functionality for that inode. + * If this is felt to be critical, then e2fsck should be run to + * force a large enough s_min_extra_isize. + */ + if (ext4_handle_valid(handle) && + jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0) + return -ENOSPC; if (ext4_write_trylock_xattr(inode, &no_expand) == 0) - return 0; + return -EBUSY; raw_inode = ext4_raw_inode(&iloc); @@ -5747,6 +5761,7 @@ static int ext4_expand_extra_isize(struct inode *inode, no_expand = 1; } ext4_write_unlock_xattr(inode, &no_expand); + return error; } @@ -5767,44 +5782,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { struct ext4_iloc iloc; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - static unsigned int mnt_count; - int err, ret; + int err; might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) return err; - if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && - !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { - /* - * In nojournal mode, we can immediately attempt to expand - * the inode. When journaled, we first need to obtain extra - * buffer credits since we may write into the EA block - * with this same handle. If journal_extend fails, then it will - * only result in a minor loss of functionality for that inode. - * If this is felt to be critical, then e2fsck should be run to - * force a large enough s_min_extra_isize. - */ - if (!ext4_handle_valid(handle) || - jbd2_journal_extend(handle, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) { - ret = ext4_expand_extra_isize(inode, - sbi->s_want_extra_isize, - iloc, handle); - if (ret) { - if (mnt_count != - le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, - "Unable to expand inode %lu. Delete" - " some EAs or run e2fsck.", - inode->i_ino); - mnt_count = - le16_to_cpu(sbi->s_es->s_mnt_count); - } - } - } - } + + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize) + ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize, + iloc, handle); + return ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 862ba3891398..7f5f4b63782b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2638,12 +2638,14 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, { struct ext4_xattr_ibody_header *header; struct buffer_head *bh = NULL; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; size_t min_offs; size_t ifree, bfree; int total_ino; void *base, *end; int error = 0, tried_min_extra_isize = 0; - int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); + int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ retry: @@ -2731,6 +2733,11 @@ out: cleanup: brelse(bh); + if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { + ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", + inode->i_ino); + mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); + } return error; } -- cgit v1.2.3 From b640b2c51b26459fc08f2185a385495b0f509a80 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 00:55:48 -0400 Subject: ext4: cleanup ext4_expand_extra_isize_ea() Clean up some goto statement, make ext4_expand_extra_isize_ea() clearer. Signed-off-by: Miao Xie Signed-off-by: Theodore Ts'o Reviewed-by: Wang Shilong --- fs/ext4/xattr.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 7f5f4b63782b..82a5af9f6668 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2637,7 +2637,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh = NULL; + struct buffer_head *bh; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); static unsigned int mnt_count; size_t min_offs; @@ -2651,7 +2651,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - goto out; + return 0; header = IHDR(inode, raw_inode); @@ -2686,6 +2686,7 @@ retry: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EFSCORRUPTED; + brelse(bh); goto cleanup; } base = BHDR(bh); @@ -2693,11 +2694,11 @@ retry: min_offs = end - base; bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base, NULL); + brelse(bh); if (bfree + ifree < isize_diff) { if (!tried_min_extra_isize && s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } error = -ENOSPC; @@ -2715,7 +2716,6 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } goto cleanup; @@ -2727,13 +2727,9 @@ shift: EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; - brelse(bh); -out: - return 0; cleanup: - brelse(bh); - if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { + if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", inode->i_ino); mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); -- cgit v1.2.3 From c03b45b853f5829816d871283c792e7527a7ded1 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 01:00:49 -0400 Subject: ext4, project: expand inode extra size if possible When upgrading from old format, try to set project id to old file first time, it will return EOVERFLOW, but if that file is dirtied(touch etc), changing project id will be allowed, this might be confusing for users, we could try to expand @i_extra_isize here too. Reported-by: Zhang Yi Signed-off-by: Miao Xie Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.h | 3 ++ fs/ext4/inode.c | 97 +++++++++++++++++++++++++++++++++++++++++------------ fs/ext4/ioctl.c | 9 +++-- 3 files changed, 85 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dabad1bc8617..48143e32411c 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -227,6 +227,9 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); +int ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc); /* * Wrapper functions with which ext4 calls into JBD. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 713b67e85f73..c774bdc22759 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5702,6 +5702,42 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, return err; } +static int __ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc, + handle_t *handle, int *no_expand) +{ + struct ext4_inode *raw_inode; + struct ext4_xattr_ibody_header *header; + int error; + + raw_inode = ext4_raw_inode(iloc); + + header = IHDR(inode, raw_inode); + + /* No extended attributes present */ + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize, 0, + new_extra_isize - EXT4_I(inode)->i_extra_isize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + return 0; + } + + /* try to expand with EAs present */ + error = ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); + if (error) { + /* + * Inode size expansion failed; don't try again + */ + *no_expand = 1; + } + + return error; +} + /* * Expand an inode by new_extra_isize bytes. * Returns 0 on success or negative error number on failure. @@ -5711,8 +5747,6 @@ static int ext4_try_to_expand_extra_isize(struct inode *inode, struct ext4_iloc iloc, handle_t *handle) { - struct ext4_inode *raw_inode; - struct ext4_xattr_ibody_header *header; int no_expand; int error; @@ -5736,32 +5770,53 @@ static int ext4_try_to_expand_extra_isize(struct inode *inode, if (ext4_write_trylock_xattr(inode, &no_expand) == 0) return -EBUSY; - raw_inode = ext4_raw_inode(&iloc); + error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, + handle, &no_expand); + ext4_write_unlock_xattr(inode, &no_expand); - header = IHDR(inode, raw_inode); + return error; +} - /* No extended attributes present */ - if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || - header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { - memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + - EXT4_I(inode)->i_extra_isize, 0, - new_extra_isize - EXT4_I(inode)->i_extra_isize); - EXT4_I(inode)->i_extra_isize = new_extra_isize; - ext4_write_unlock_xattr(inode, &no_expand); - return 0; +int ext4_expand_extra_isize(struct inode *inode, + unsigned int new_extra_isize, + struct ext4_iloc *iloc) +{ + handle_t *handle; + int no_expand; + int error, rc; + + if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { + brelse(iloc->bh); + return -EOVERFLOW; } - /* try to expand with EAs present */ - error = ext4_expand_extra_isize_ea(inode, new_extra_isize, - raw_inode, handle); + handle = ext4_journal_start(inode, EXT4_HT_INODE, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + brelse(iloc->bh); + return error; + } + + ext4_write_lock_xattr(inode, &no_expand); + + BUFFER_TRACE(iloc.bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, iloc->bh); if (error) { - /* - * Inode size expansion failed; don't try again - */ - no_expand = 1; + brelse(iloc->bh); + goto out_stop; } - ext4_write_unlock_xattr(inode, &no_expand); + error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, + handle, &no_expand); + + rc = ext4_mark_iloc_dirty(handle, inode, iloc); + if (!error) + error = rc; + + ext4_write_unlock_xattr(inode, &no_expand); +out_stop: + ext4_journal_stop(handle); return error; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 4f4a8391585c..afb66d4ab5cf 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -349,11 +349,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { - err = -EOVERFLOW; + err = ext4_expand_extra_isize(inode, + EXT4_SB(sb)->s_want_extra_isize, + &iloc); + if (err) + goto out_unlock; + } else { brelse(iloc.bh); - goto out_unlock; } - brelse(iloc.bh); dquot_initialize(inode); -- cgit v1.2.3 From aec51758ce10a9c847a62a48a168f8c804c6e053 Mon Sep 17 00:00:00 2001 From: Jerry Lee Date: Sun, 6 Aug 2017 01:18:31 -0400 Subject: ext4: fix overflow caused by missing cast in ext4_resize_fs() On a 32-bit platform, the value of n_blcoks_count may be wrong during the file system is resized to size larger than 2^32 blocks. This may caused the superblock being corrupted with zero blocks count. Fixes: 1c6bd7173d66 Signed-off-by: Jerry Lee Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org # 3.7+ --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c3ed9021b781..035cd3f4785e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1927,7 +1927,8 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb); n_group--; /* set to last group number */ } -- cgit v1.2.3 From 4e5620132144666ab41fc2799fbc055830187b7e Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Sun, 6 Aug 2017 01:33:07 -0400 Subject: ext4: fix copy paste error in ext4_swap_extents() This bug was found by a static code checker tool for copy paste problems. Signed-off-by: Maninder Singh Signed-off-by: Vaneet Narang Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 230ca74c4841..97f0fd06728d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5836,7 +5836,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, if (e1_blk > lblk1) next1 = e1_blk; if (e2_blk > lblk2) - next2 = e1_blk; + next2 = e2_blk; /* Do we have something to swap */ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) goto finish; -- cgit v1.2.3