summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-26 10:32:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-26 10:32:13 -0700
commit65ae689329c5d6a149b9201df9321368fbdb6a5c (patch)
treed5eec2c81b1d50ff7af1319e190617962a2386d1
parent26a3b01be87f7890125933eea03eb997d335a921 (diff)
parent0ccc3876e4b2a1559a4dbe3126dda4459d38a83b (diff)
downloadlinux-65ae689329c5d6a149b9201df9321368fbdb6a5c.tar.bz2
Merge tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - fsync fixes: i_size for truncate vs fsync, dio vs buffered during snapshotting, remove complicated but incomplete assertion - removed excessive warnigs, misreported device stats updates - fix raid56 page mapping for 32bit arch - fixes reported by static analyzer * tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: Btrfs: fix assertion failure on fsync with NO_HOLES enabled btrfs: Avoid possible qgroup_rsv_size overflow in btrfs_calculate_inode_block_rsv_size btrfs: Fix bound checking in qgroup_trace_new_subtree_blocks btrfs: raid56: properly unmap parity page in finish_parity_scrub() btrfs: don't report readahead errors and don't update statistics Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes btrfs: remove WARN_ON in log_dir_items Btrfs: fix incorrect file size after shrinking truncate and fsync
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/raid56.c3
-rw-r--r--fs/btrfs/transaction.c49
-rw-r--r--fs/btrfs/tree-log.c33
-rw-r--r--fs/btrfs/volumes.c2
6 files changed, 72 insertions, 21 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1d49694e6ae3..c5880329ae37 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
*
* This is overestimating in most cases.
*/
- qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
+ qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
spin_lock(&block_rsv->lock);
block_rsv->size = reserve_size;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index eb680b715dd6..e659d9d61107 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
int i;
/* Level sanity check */
- if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL ||
- root_level < 0 || root_level >= BTRFS_MAX_LEVEL ||
+ if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
+ root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < cur_level) {
btrfs_err_rl(fs_info,
"%s: bad levels, cur_level=%d root_level=%d",
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1869ba8e5981..67a6f7d47402 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2430,8 +2430,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
bitmap_clear(rbio->dbitmap, pagenr, 1);
kunmap(p);
- for (stripe = 0; stripe < rbio->real_stripes; stripe++)
+ for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
+ kunmap(p_page);
}
__free_page(p_page);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index acdad6d658f5..e4e665f422fc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
}
}
-static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
/*
* We use writeback_inodes_sb here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
*/
- if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
+ } else {
+ struct btrfs_pending_snapshot *pending;
+ struct list_head *head = &trans->transaction->pending_snapshots;
+
+ /*
+ * Flush dellaloc for any root that is going to be snapshotted.
+ * This is done to avoid a corrupted version of files, in the
+ * snapshots, that had both buffered and direct IO writes (even
+ * if they were done sequentially) due to an unordered update of
+ * the inode's size on disk.
+ */
+ list_for_each_entry(pending, head, list) {
+ int ret;
+
+ ret = btrfs_start_delalloc_snapshot(pending->root);
+ if (ret)
+ return ret;
+ }
+ }
return 0;
}
-static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
+static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
{
- if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+
+ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
+ } else {
+ struct btrfs_pending_snapshot *pending;
+ struct list_head *head = &trans->transaction->pending_snapshots;
+
+ /*
+ * Wait for any dellaloc that we started previously for the roots
+ * that are going to be snapshotted. This is to avoid a corrupted
+ * version of files in the snapshots that had both buffered and
+ * direct IO writes (even if they were done sequentially).
+ */
+ list_for_each_entry(pending, head, list)
+ btrfs_wait_ordered_extents(pending->root,
+ U64_MAX, 0, U64_MAX);
+ }
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
extwriter_counter_dec(cur_trans, trans->type);
- ret = btrfs_start_delalloc_flush(fs_info);
+ ret = btrfs_start_delalloc_flush(trans);
if (ret)
goto cleanup_transaction;
@@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto cleanup_transaction;
- btrfs_wait_delalloc_flush(fs_info);
+ btrfs_wait_delalloc_flush(trans);
btrfs_scrub_pause(fs_info);
/*
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f06454a55e00..561884f60d35 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- /* find the first key from this transaction again */
+ /*
+ * Find the first key from this transaction again. See the note for
+ * log_new_dir_dentries, if we're logging a directory recursively we
+ * won't be holding its i_mutex, which means we can modify the directory
+ * while we're logging it. If we remove an entry between our first
+ * search and this search we'll not find the key again and can just
+ * bail.
+ */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (WARN_ON(ret != 0))
+ if (ret != 0)
goto done;
/*
@@ -4544,6 +4551,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
*size_ret = btrfs_inode_size(path->nodes[0], item);
+ /*
+ * If the in-memory inode's i_size is smaller then the inode
+ * size stored in the btree, return the inode's i_size, so
+ * that we get a correct inode size after replaying the log
+ * when before a power failure we had a shrinking truncate
+ * followed by addition of a new name (rename / new hard link).
+ * Otherwise return the inode size from the btree, to avoid
+ * data loss when replaying a log due to previously doing a
+ * write that expands the inode's size and logging a new name
+ * immediately after.
+ */
+ if (*size_ret > inode->vfs_inode.i_size)
+ *size_ret = inode->vfs_inode.i_size;
}
btrfs_release_path(path);
@@ -4705,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, extent) ==
- BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_ram_bytes(leaf, extent);
- ASSERT(len == i_size ||
- (len == fs_info->sectorsize &&
- btrfs_file_extent_compression(leaf, extent) !=
- BTRFS_COMPRESS_NONE) ||
- (len < i_size && i_size < fs_info->sectorsize));
+ BTRFS_FILE_EXTENT_INLINE)
return 0;
- }
len = btrfs_file_extent_num_bytes(leaf, extent);
/* Last extent goes beyond i_size, no need to log a hole. */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9024eee889b9..db934ceae9c1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio)
if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
- else
+ else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)