From a4d2aadca184ece182418950d45ba4ffc7b652d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:48:00 -0400 Subject: ext4: sysfs: print ext4_super_block fields as little-endian While working on extended rand for last_error/first_error timestamps, I noticed that the endianess is wrong; we access the little-endian fields in struct ext4_super_block as native-endian when we print them. This adds a special case in ext4_attr_show() and ext4_attr_store() to byteswap the superblock fields if needed. In older kernels, this code was part of super.c, it got moved to sysfs.c in linux-4.4. Cc: stable@vger.kernel.org Fixes: 52c198c6820f ("ext4: add sysfs entry showing whether the fs contains errors") Reviewed-by: Andreas Dilger Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index f34da0bb8f17..b970a200f20c 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -274,8 +274,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - return snprintf(buf, PAGE_SIZE, "%u\n", - *((unsigned int *) ptr)); + if (a->attr_ptr == ptr_ext4_super_block_offset) + return snprintf(buf, PAGE_SIZE, "%u\n", + le32_to_cpup(ptr)); + else + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) ptr)); case attr_pointer_atomic: if (!ptr) return 0; @@ -308,7 +312,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj, ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret) return ret; - *((unsigned int *) ptr) = t; + if (a->attr_ptr == ptr_ext4_super_block_offset) + *((__le32 *) ptr) = cpu_to_le32(t); + else + *((unsigned int *) ptr) = t; return len; case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len); -- cgit v1.2.3 From af123b3718592a66a24716ed4724dc214220492b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:49:00 -0400 Subject: ext4: use 64-bit timestamps for mmp_time The mmp_time field is 64 bits wide, which is good, but calling get_seconds() results in a 32-bit value on 32-bit architectures. Using ktime_get_real_seconds() instead returns 64 bits everywhere. Reviewed-by: Andreas Dilger Reviewed-by: Jan Kara Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/mmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 638ad4743477..39b07c2d3384 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -147,7 +147,7 @@ static int kmmpd(void *data) mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); /* * Start with the higher mmp_check_interval and reduce it if * the MMP block is being updated on time. @@ -165,7 +165,7 @@ static int kmmpd(void *data) seq = 1; mmp->mmp_seq = cpu_to_le32(seq); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); last_update_time = jiffies; retval = write_mmp_block(sb, bh); @@ -241,7 +241,7 @@ static int kmmpd(void *data) * Unmount seems to be clean. */ mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); - mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); retval = write_mmp_block(sb, bh); -- cgit v1.2.3 From 5ffff834322281f550b10c958fd9dd85679b8dbb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:50:00 -0400 Subject: ext4: use ktime_get_real_seconds for i_dtime We only care about the low 32-bit for i_dtime as explained in commit b5f515735bea ("ext4: avoid Y2038 overflow in recently_deleted()"), so the use of get_seconds() is correct here, but that function is getting removed in the process of the y2038 fixes, so let's use the modern ktime_get_real_seconds() here. Reviewed-by: Andreas Dilger Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4efe77286ecd..ba0de19fb1ad 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -317,7 +317,7 @@ stop_handle: * (Well, we could do this if we need to, but heck - it works) */ ext4_orphan_del(handle, inode); - EXT4_I(inode)->i_dtime = get_seconds(); + EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds(); /* * One subtle ordering requirement: if anything has gone wrong -- cgit v1.2.3 From 7b62b293200ffaba5b281668ba7102cb4209774f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:51:00 -0400 Subject: ext4: use timespec64 for all inode times This is the last missing piece for the inode times on 32-bit systems: now that VFS interfaces use timespec64, we just need to stop truncating the tv_sec values for y2038 compatibililty. Reviewed-by: Andreas Dilger Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 22 +++++++++------------- fs/ext4/ialloc.c | 2 +- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7c7123f265c2..1d7dac2df6e8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -789,17 +789,16 @@ struct move_extent { * affected filesystem before 2242. */ -static inline __le32 ext4_encode_extra_time(struct timespec *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 *time) { - u32 extra = sizeof(time->tv_sec) > 4 ? - ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; + u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK; return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); } -static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +static inline void ext4_decode_extra_time(struct timespec64 *time, + __le32 extra) { - if (unlikely(sizeof(time->tv_sec) > 4 && - (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { + if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) { #if 1 /* Handle legacy encoding of pre-1970 dates with epoch @@ -821,9 +820,8 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) do { \ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\ - struct timespec ts = timespec64_to_timespec((inode)->xtime); \ (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&ts); \ + ext4_encode_extra_time(&(inode)->xtime); \ } \ } while (0) @@ -840,10 +838,8 @@ do { \ do { \ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \ - struct timespec ts = timespec64_to_timespec((inode)->xtime); \ - ext4_decode_extra_time(&ts, \ + ext4_decode_extra_time(&(inode)->xtime, \ raw_inode->xtime ## _extra); \ - (inode)->xtime = timespec_to_timespec64(ts); \ } \ else \ (inode)->xtime.tv_nsec = 0; \ @@ -993,9 +989,9 @@ struct ext4_inode_info { /* * File creation time. Its function is same as that of - * struct timespec i_{a,c,m}time in the generic inode. + * struct timespec64 i_{a,c,m}time in the generic inode. */ - struct timespec i_crtime; + struct timespec64 i_crtime; /* mballoc */ struct list_head i_prealloc_list; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f336cbc6e932..dffd21de2694 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1086,7 +1086,7 @@ got: /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - ei->i_crtime = timespec64_to_timespec(inode->i_mtime); + ei->i_crtime = inode->i_mtime; memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; -- cgit v1.2.3 From 6a0678a79bb3a4e5fc1b680e7afc78727e21aff3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 29 Jul 2018 15:51:48 -0400 Subject: ext4: super: extend timestamps to 40 bits The inode timestamps use 34 bits in ext4, but the various timestamps in the superblock are limited to 32 bits. If every user accesses these as 'unsigned', then this is good until year 2106, but it seems better to extend this a bit further in the process of removing the deprecated get_seconds() function. This adds another byte for each timestamp in the superblock, making them long enough to store timestamps beyond what is in the inodes, which seems good enough here (in ocfs2, they are already 64-bit wide, which is appropriate for a new layout). I did not modify e2fsprogs, which obviously needs the same change to actually interpret future timestamps correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 9 ++++++++- fs/ext4/super.c | 39 ++++++++++++++++++++++++++++++--------- fs/ext4/sysfs.c | 19 +++++++++++++++++-- 3 files changed, 55 insertions(+), 12 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1d7dac2df6e8..6d7dec48372b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1295,7 +1295,14 @@ struct ext4_super_block { __le32 s_lpf_ino; /* Location of the lost+found inode */ __le32 s_prj_quota_inum; /* inode for tracking project quota */ __le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */ - __le32 s_reserved[98]; /* Padding to the end of the block */ + __u8 s_wtime_hi; + __u8 s_mtime_hi; + __u8 s_mkfs_time_hi; + __u8 s_lastcheck_hi; + __u8 s_first_error_time_hi; + __u8 s_last_error_time_hi; + __u8 s_pad[2]; + __le32 s_reserved[96]; /* Padding to the end of the block */ __le32 s_checksum; /* crc32c(superblock) */ }; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b7f7922061be..67f4310edd0a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -312,6 +312,24 @@ void ext4_itable_unused_set(struct super_block *sb, bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); } +static void __ext4_update_tstamp(__le32 *lo, __u8 *hi) +{ + time64_t now = ktime_get_real_seconds(); + + now = clamp_val(now, 0, (1ull << 40) - 1); + + *lo = cpu_to_le32(lower_32_bits(now)); + *hi = upper_32_bits(now); +} + +static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) +{ + return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo); +} +#define ext4_update_tstamp(es, tstamp) \ + __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) +#define ext4_get_tstamp(es, tstamp) \ + __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) static void __save_error_info(struct super_block *sb, const char *func, unsigned int line) @@ -322,11 +340,12 @@ static void __save_error_info(struct super_block *sb, const char *func, if (bdev_read_only(sb->s_bdev)) return; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - es->s_last_error_time = cpu_to_le32(get_seconds()); + ext4_update_tstamp(es, s_last_error_time); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); es->s_last_error_line = cpu_to_le32(line); if (!es->s_first_error_time) { es->s_first_error_time = es->s_last_error_time; + es->s_first_error_time_hi = es->s_last_error_time_hi; strncpy(es->s_first_error_func, func, sizeof(es->s_first_error_func)); es->s_first_error_line = cpu_to_le32(line); @@ -2174,8 +2193,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, "warning: maximal mount count reached, " "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && - (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) + (ext4_get_tstamp(es, s_lastcheck) + + le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds())) ext4_msg(sb, KERN_WARNING, "warning: checktime reached, " "running e2fsck is recommended"); @@ -2184,7 +2203,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); - es->s_mtime = cpu_to_le32(get_seconds()); + ext4_update_tstamp(es, s_mtime); ext4_update_dynamic_rev(sb); if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); @@ -2875,8 +2894,9 @@ static void print_daily_error_info(struct timer_list *t) ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", le32_to_cpu(es->s_error_count)); if (es->s_first_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", - sb->s_id, le32_to_cpu(es->s_first_error_time), + printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d", + sb->s_id, + ext4_get_tstamp(es, s_first_error_time), (int) sizeof(es->s_first_error_func), es->s_first_error_func, le32_to_cpu(es->s_first_error_line)); @@ -2889,8 +2909,9 @@ static void print_daily_error_info(struct timer_list *t) printk(KERN_CONT "\n"); } if (es->s_last_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", - sb->s_id, le32_to_cpu(es->s_last_error_time), + printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d", + sb->s_id, + ext4_get_tstamp(es, s_last_error_time), (int) sizeof(es->s_last_error_func), es->s_last_error_func, le32_to_cpu(es->s_last_error_line)); @@ -4813,7 +4834,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) * to complain and force a full file system check. */ if (!(sb->s_flags & SB_RDONLY)) - es->s_wtime = cpu_to_le32(get_seconds()); + ext4_update_tstamp(es, s_wtime); if (sb->s_bdev->bd_part) es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index b970a200f20c..e60cc5e89023 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -25,6 +25,8 @@ typedef enum { attr_reserved_clusters, attr_inode_readahead, attr_trigger_test_error, + attr_first_error_time, + attr_last_error_time, attr_feature, attr_pointer_ui, attr_pointer_atomic, @@ -182,8 +184,8 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); -EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); -EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); +EXT4_ATTR(first_error_time, 0444, first_error_time); +EXT4_ATTR(last_error_time, 0444, last_error_time); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -249,6 +251,15 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) return NULL; } +static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi) +{ + return snprintf(buf, PAGE_SIZE, "%lld", + ((time64_t)hi << 32) + le32_to_cpu(lo)); +} + +#define print_tstamp(buf, es, tstamp) \ + __print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi) + static ssize_t ext4_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -287,6 +298,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj, atomic_read((atomic_t *) ptr)); case attr_feature: return snprintf(buf, PAGE_SIZE, "supported\n"); + case attr_first_error_time: + return print_tstamp(buf, sbi->s_es, s_first_error_time); + case attr_last_error_time: + return print_tstamp(buf, sbi->s_es, s_last_error_time); } return 0; -- cgit v1.2.3 From 7f144fd046d967ff2fbba59203b42b888ad5aae7 Mon Sep 17 00:00:00 2001 From: Junichi Uekawa Date: Sun, 29 Jul 2018 15:51:52 -0400 Subject: ext4: fix warning message in ext4_enable_quotas() Output the warning message before we clobber type and be -1 all the time. The error message would now be [ 1.519791] EXT4-fs warning (device vdb): ext4_enable_quotas:5402: Failed to enable quota tracking (type=0, err=-3). Please run e2fsck to fix. Signed-off-by: Junichi Uekawa Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 67f4310edd0a..3e64d8d51ac4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5686,13 +5686,13 @@ static int ext4_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { - for (type--; type >= 0; type--) - dquot_quota_off(sb, type); - ext4_warning(sb, "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + return err; } } -- cgit v1.2.3 From 21ac738ede0b49004b53b4a44fe3df7bb4a78280 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sun, 29 Jul 2018 15:51:54 -0400 Subject: ext4: check allocation failure when duplicating "data" in ext4_remount() There is no check for allocation failure when duplicating "data" in ext4_remount(). Check for failure and return error -ENOMEM in this case. Signed-off-by: Chengguang Xu Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/super.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e64d8d51ac4..d4a218ba626c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5101,6 +5101,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #endif char *orig_data = kstrdup(data, GFP_KERNEL); + if (data && !orig_data) + return -ENOMEM; + /* Store the original options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; -- cgit v1.2.3 From 62bbdd9974678513fee113f09f6b672623521179 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 29 Jul 2018 16:11:59 -0400 Subject: ext4: use swap macro in mext_page_double_lock Make use of the swap macro and remove unnecessary variable *tmp*. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Theodore Ts'o --- fs/ext4/move_extent.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 8e17efdcbf11..a409ff70d67b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -134,9 +134,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, mapping[0] = inode1->i_mapping; mapping[1] = inode2->i_mapping; } else { - pgoff_t tmp = index1; - index1 = index2; - index2 = tmp; + swap(index1, index2); mapping[0] = inode2->i_mapping; mapping[1] = inode1->i_mapping; } -- cgit v1.2.3 From 430657b6be896db57d974375cc499ca212c7f01d Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Sun, 29 Jul 2018 17:00:22 -0400 Subject: ext4: handle layout changes to pinned DAX mappings Follow the lead of xfs_break_dax_layouts() and add synchronization between operations in ext4 which remove blocks from an inode (hole punch, truncate down, etc.) and pages which are pinned due to DAX DMA operations. Signed-off-by: Ross Zwisler Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Reviewed-by: Lukas Czerner --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 17 +++++++++++++++++ fs/ext4/inode.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/truncate.h | 4 ++++ 4 files changed, 68 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6d7dec48372b..1fc013f3d944 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2459,6 +2459,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); +extern int ext4_break_layouts(struct inode *); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8ce6fd5b10dd..72a361d5ef74 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4826,6 +4826,13 @@ static long ext4_zero_range(struct file *file, loff_t offset, * released from page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) { + up_write(&EXT4_I(inode)->i_mmap_sem); + goto out_mutex; + } + ret = ext4_update_disksize_before_punch(inode, offset, len); if (ret) { up_write(&EXT4_I(inode)->i_mmap_sem); @@ -5499,6 +5506,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_mmap; + /* * Need to round down offset to be aligned with page size boundary * for page size > block size. @@ -5647,6 +5659,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_mmap; + /* * Need to round down to align start offset to page size boundary * for page size > block size. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ba0de19fb1ad..60432498acfb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4191,6 +4191,39 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, return 0; } +static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock) +{ + *did_unlock = true; + up_write(&ei->i_mmap_sem); + schedule(); + down_write(&ei->i_mmap_sem); +} + +int ext4_break_layouts(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct page *page; + bool retry; + int error; + + if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) + return -EINVAL; + + do { + retry = false; + page = dax_layout_busy_page(inode->i_mapping); + if (!page) + return 0; + + error = ___wait_var_event(&page->_refcount, + atomic_read(&page->_refcount) == 1, + TASK_INTERRUPTIBLE, 0, 0, + ext4_wait_dax_page(ei, &retry)); + } while (error == 0 && retry); + + return error; +} + /* * ext4_punch_hole: punches a hole in a file by releasing the blocks * associated with the given offset and length @@ -4264,6 +4297,11 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) * page cache. */ down_write(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_dio; + first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; @@ -5553,6 +5591,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_wait_for_tail_page_commit(inode); } down_write(&EXT4_I(inode)->i_mmap_sem); + + rc = ext4_break_layouts(inode); + if (rc) { + up_write(&EXT4_I(inode)->i_mmap_sem); + error = rc; + goto err_out; + } + /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index 0cb13badf473..bcbe3668c1d4 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h @@ -11,6 +11,10 @@ */ static inline void ext4_truncate_failed_write(struct inode *inode) { + /* + * We don't need to call ext4_break_layouts() because the blocks we + * are truncating were never visible to userspace. + */ down_write(&EXT4_I(inode)->i_mmap_sem); truncate_inode_pages(inode->i_mapping, inode->i_size); ext4_truncate(inode); -- cgit v1.2.3 From f39b3f45dbcb0343822cce31ea7636ad66e60bc2 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 29 Jul 2018 17:13:42 -0400 Subject: ext4: reset error code in ext4_find_entry in fallback When ext4_find_entry() falls back to "searching the old fashioned way" due to a corrupt dx dir, it needs to reset the error code to NULL so that the nonstandard ERR_BAD_DX_DIR code isn't returned to userspace. https://bugzilla.kernel.org/show_bug.cgi?id=199947 Reported-by: Anatoly Trosinenko Reviewed-by: Andreas Dilger Signed-off-by: Eric Sandeen Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2a4c25c4681d..116ff68c5bd4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1398,6 +1398,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto cleanup_and_exit; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); + ret = NULL; } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); if (!nblocks) { -- cgit v1.2.3 From 9af0b3d1257756394ebbd06b14937b557e3a756b Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 29 Jul 2018 17:27:45 -0400 Subject: ext4: fix race when setting the bitmap corrupted flag Whenever we hit block or inode bitmap corruptions we set bit and then reduce this block group free inode/clusters counter to expose right available space. However some of ext4_mark_group_bitmap_corrupted() is called inside group spinlock, some are not, this could make it happen that we double reduce one block group free counters from system. Always hold group spinlock for it could fix it, but it looks a little heavy, we could use test_and_set_bit() to fix race problems here. Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d4a218ba626c..f7750bc5b85a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -795,26 +795,26 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); + int ret; - if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) && - !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) { - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, - &grp->bb_state); + if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) { + ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, + &grp->bb_state); + if (!ret) + percpu_counter_sub(&sbi->s_freeclusters_counter, + grp->bb_free); } - if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) && - !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - if (gdp) { + if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) { + ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, + &grp->bb_state); + if (!ret && gdp) { int count; count = ext4_free_inodes_count(sb, gdp); percpu_counter_sub(&sbi->s_freeinodes_counter, count); } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, - &grp->bb_state); } } -- cgit v1.2.3 From 5ef2a69993676a0dfd49bf60ae1323eb8a288366 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 1 Aug 2018 12:02:31 -0400 Subject: ext4: use ext4_warning() for sb_getblk failure Out of memory should not be considered as critical errors; so replace ext4_error() with ext4_warnig(). Signed-off-by: Wang Shilong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/balloc.c | 6 +++--- fs/ext4/ialloc.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index aa52d87985aa..e5d6ee61ff48 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -426,9 +426,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, "Cannot get buffer for block bitmap - " - "block_group = %u, block_bitmap = %llu", - block_group, bitmap_blk); + ext4_warning(sb, "Cannot get buffer for block bitmap - " + "block_group = %u, block_bitmap = %llu", + block_group, bitmap_blk); return ERR_PTR(-ENOMEM); } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index dffd21de2694..2addcb8730e1 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -138,9 +138,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, "Cannot read inode bitmap - " - "block_group = %u, inode_bitmap = %llu", - block_group, bitmap_blk); + ext4_warning(sb, "Cannot read inode bitmap - " + "block_group = %u, inode_bitmap = %llu", + block_group, bitmap_blk); return ERR_PTR(-ENOMEM); } if (bitmap_uptodate(bh)) -- cgit v1.2.3 From 7d95178c77014dbd8dce36ee40bbbc5e6c121ff5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Aug 2018 12:36:52 -0400 Subject: ext4: check for NUL characters in extended attribute's name Extended attribute names are defined to be NUL-terminated, so the name must not contain a NUL character. This is important because there are places when remove extended attribute, the code uses strlen to determine the length of the entry. That should probably be fixed at some point, but code is currently really messy, so the simplest fix for now is to simply validate that the extended attributes are sane. https://bugzilla.kernel.org/show_bug.cgi?id=200401 Reported-by: Wen Xu Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 723df14f4084..f36fc5d5b257 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -190,6 +190,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) return -EFSCORRUPTED; + if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) + return -EFSCORRUPTED; e = next; } -- cgit v1.2.3 From 1a5d5e5d51e75a5bca67dadbcea8c841934b7b85 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Thu, 2 Aug 2018 00:03:40 -0400 Subject: ext4: fix spectre gadget in ext4_mb_regular_allocator() 'ac->ac_g_ex.fe_len' is a user-controlled value which is used in the derivation of 'ac->ac_2order'. 'ac->ac_2order', in turn, is used to index arrays which makes it a potential spectre gadget. Fix this by sanitizing the value assigned to 'ac->ac2_order'. This covers the following accesses found with the help of smatch: * fs/ext4/mballoc.c:1896 ext4_mb_simple_scan_group() warn: potential spectre issue 'grp->bb_counters' [w] (local cap) * fs/ext4/mballoc.c:445 mb_find_buddy() warn: potential spectre issue 'EXT4_SB(e4b->bd_sb)->s_mb_offsets' [r] (local cap) * fs/ext4/mballoc.c:446 mb_find_buddy() warn: potential spectre issue 'EXT4_SB(e4b->bd_sb)->s_mb_maxs' [r] (local cap) Suggested-by: Josh Poimboeuf Signed-off-by: Jeremy Cline Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/mballoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f7ab34088162..8b24d3d42cb3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -2140,7 +2141,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) * This should tell if fe_len is exactly power of 2 */ if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0) - ac->ac_2order = i - 1; + ac->ac_2order = array_index_nospec(i - 1, + sb->s_blocksize_bits + 2); } /* if stream allocation is enabled, use global goal */ -- cgit v1.2.3 From bc716523462f98ea6bfef3e1a5926daadbf32e9e Mon Sep 17 00:00:00 2001 From: Liu Song Date: Thu, 2 Aug 2018 00:11:16 -0400 Subject: ext4: improve code readability in ext4_iget() Merge the duplicated complex conditions to improve code readability. Signed-off-by: Liu Song Signed-off-by: Theodore Ts'o Reviewed-by: Jiang Biao --- fs/ext4/inode.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60432498acfb..8f6ad7667974 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4982,17 +4982,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EFSCORRUPTED; goto bad_inode; } else if (!ext4_has_inline_data(inode)) { - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - (S_ISLNK(inode->i_mode) && - !ext4_inode_is_fast_symlink(inode)))) - /* Validate extent which is part of inode */ + /* validate the block references in the inode */ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + (S_ISLNK(inode->i_mode) && + !ext4_inode_is_fast_symlink(inode))) { + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_check_inode(inode); - } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - (S_ISLNK(inode->i_mode) && - !ext4_inode_is_fast_symlink(inode))) { - /* Validate block references which are part of inode */ - ret = ext4_ind_check_inode(inode); + else + ret = ext4_ind_check_inode(inode); } } if (ret) -- cgit v1.2.3 From 863c37fcb14f8b66ea831b45fb35a53ac4a8d69e Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Sat, 4 Aug 2018 17:34:07 -0400 Subject: ext4: remove unneeded variable "err" in ext4_mb_release_inode_pa() The err is not used after initalization. So just remove the variable. Signed-off-by: zhong jiang Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8b24d3d42cb3..e29fce2fbf25 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3801,7 +3801,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, ext4_group_t group; ext4_grpblk_t bit; unsigned long long grp_blk_start; - int err = 0; int free = 0; BUG_ON(pa->pa_deleted == 0); @@ -3842,7 +3841,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, } atomic_add(free, &sbi->s_mb_discarded); - return err; + return 0; } static noinline_for_stack int -- cgit v1.2.3