Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba: "A number of core changes that make things work better in general, code is simpler and cleaner. Core changes: - per-inode file extent tree, for in memory tracking of contiguous extent ranges to make sure i_size adjustments are accurate - tree root structures are protected by reference counts, replacing SRCU that did not cover some cases - leak detector for tree root structures - per-transaction pinned extent tracking - buffer heads are replaced by bios for super block access - speedup of extent back reference resolution, on an example test scenario the runtime of send went down from a hour to minutes - factor out locking scheme used for subvolume writer and NOCOW exclusion, abstracted as DREW lock, double reader-writer exclusion (allow either readers or writers) - cleanup and abstract extent allocation policies, preparation for zoned device support - make reflink/clone_range work on inline extents - add more cancellation point for relocation, improves long response from 'balance cancel' - add page migration callback for data pages - switch to guid for uuids, with additional cleanups of the interface - make ranged full fsyncs more efficient - removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC - remove b-tree readahead from delayed refs paths, avoiding seek and read unnecessary blocks Features: - v2 of ioctl to delete subvolumes, allowing to delete by id and more future extensions Fixes: - fix qgroup rescan worker that could block umount - fix crash during unmount due to race with delayed inode workers - fix dellaloc flushing logic that could create unnecessary chunks under heavy load - fix missing file extent item for hole after ranged fsync - several fixes in relocation error handling Other: - more documentation of relocation, device replace, space reservations - many random cleanups" * tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits) btrfs: fix missing semaphore unlock in btrfs_sync_file btrfs: use nofs allocations for running delayed items btrfs: sysfs: Use scnprintf() instead of snprintf() btrfs: do not resolve backrefs for roots that are being deleted btrfs: track reloc roots based on their commit root bytenr btrfs: restart relocate_tree_blocks properly btrfs: reloc: reorder reservation before root selection btrfs: do not readahead in build_backref_tree btrfs: do not use readahead for running delayed refs btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support btrfs: kill the subvol_srcu btrfs: make btrfs_cleanup_fs_roots use the radix tree lock btrfs: don't take an extra root ref at allocation time btrfs: hold a ref on the root on the dead roots list btrfs: make inodes hold a ref on their roots btrfs: move the root freeing stuff into btrfs_put_root btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root btrfs: make the extent buffer leak check per fs info ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-03-31 13:00:16 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-03-31 13:00:16 -0700
commit: 15c981d16d70e8a5be297fa4af07a64ab7e080ed (patch)
tree: 9487ba1525d75501cd1a3896dac4e6321efd3a55 /fs/btrfs/file.c
parent: 1455c69900c8c6442b182a74087931f4ffb1cac4 (diff)
parent: 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb (diff)
download: linux-15c981d16d70e8a5be297fa4af07a64ab7e080ed.tar.bz2
1 files changed, 53 insertions, 27 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a16da274c9aa..8a144f9cb7ac 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -27,6 +27,7 @@
 #include "qgroup.h"
 #include "compression.h"
 #include "delalloc-space.h"
+#include "reflink.h"
 
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -277,7 +278,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	struct btrfs_key key;
 	struct btrfs_ioctl_defrag_range_args range;
 	int num_defrag;
-	int index;
 	int ret;
 
 	/* get the inode */
@@ -285,9 +285,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	key.type = BTRFS_ROOT_ITEM_KEY;
 	key.offset = (u64)-1;
 
-	index = srcu_read_lock(&fs_info->subvol_srcu);
-
-	inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
+	inode_root = btrfs_get_fs_root(fs_info, &key, true);
 	if (IS_ERR(inode_root)) {
 		ret = PTR_ERR(inode_root);
 		goto cleanup;
@@ -297,11 +295,11 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	key.type = BTRFS_INODE_ITEM_KEY;
 	key.offset = 0;
 	inode = btrfs_iget(fs_info->sb, &key, inode_root);
+	btrfs_put_root(inode_root);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto cleanup;
 	}
-	srcu_read_unlock(&fs_info->subvol_srcu, index);
 
 	/* do a chunk of defrag */
 	clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -337,7 +335,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	iput(inode);
 	return 0;
 cleanup:
-	srcu_read_unlock(&fs_info->subvol_srcu, index);
 	kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
 	return ret;
 }
@@ -1552,15 +1549,14 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 	u64 num_bytes;
 	int ret;
 
-	ret = btrfs_start_write_no_snapshotting(root);
-	if (!ret)
+	if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
 		return -EAGAIN;
 
 	lockstart = round_down(pos, fs_info->sectorsize);
 	lockend = round_up(pos + *write_bytes,
 			   fs_info->sectorsize) - 1;
 
-	btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart,
+	btrfs_lock_and_flush_ordered_range(inode, lockstart,
 					   lockend, NULL);
 
 	num_bytes = lockend - lockstart + 1;
@@ -1568,7 +1564,7 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 			NULL, NULL, NULL);
 	if (ret <= 0) {
 		ret = 0;
-		btrfs_end_write_no_snapshotting(root);
+		btrfs_drew_write_unlock(&root->snapshot_lock);
 	} else {
 		*write_bytes = min_t(size_t, *write_bytes ,
 				     num_bytes - pos + lockstart);
@@ -1674,7 +1670,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
 						data_reserved, pos,
 						write_bytes);
 			else
-				btrfs_end_write_no_snapshotting(root);
+				btrfs_drew_write_unlock(&root->snapshot_lock);
 			break;
 		}
 
@@ -1778,7 +1774,7 @@ again:
 
 		release_bytes = 0;
 		if (only_release_metadata)
-			btrfs_end_write_no_snapshotting(root);
+			btrfs_drew_write_unlock(&root->snapshot_lock);
 
 		if (only_release_metadata && copied > 0) {
 			lockstart = round_down(pos,
@@ -1807,7 +1803,7 @@ again:
 
 	if (release_bytes) {
 		if (only_release_metadata) {
-			btrfs_end_write_no_snapshotting(root);
+			btrfs_drew_write_unlock(&root->snapshot_lock);
 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
 					release_bytes, true);
 		} else {
@@ -2071,6 +2067,16 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	btrfs_init_log_ctx(&ctx, inode);
 
 	/*
+	 * Set the range to full if the NO_HOLES feature is not enabled.
+	 * This is to avoid missing file extent items representing holes after
+	 * replaying the log.
+	 */
+	if (!btrfs_fs_incompat(fs_info, NO_HOLES)) {
+		start = 0;
+		end = LLONG_MAX;
+	}
+
+	/*
 	 * We write the dirty pages in the range and wait until they complete
 	 * out of the ->i_mutex. If so, we can flush the dirty pages by
 	 * multi-task, and make the performance up.  See
@@ -2092,19 +2098,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	atomic_inc(&root->log_batch);
 
 	/*
-	 * If the inode needs a full sync, make sure we use a full range to
-	 * avoid log tree corruption, due to hole detection racing with ordered
-	 * extent completion for adjacent ranges, and assertion failures during
-	 * hole detection. Do this while holding the inode lock, to avoid races
-	 * with other tasks.
-	 */
-	if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-		     &BTRFS_I(inode)->runtime_flags)) {
-		start = 0;
-		end = LLONG_MAX;
-	}
-
-	/*
 	 * Before we acquired the inode's lock, someone may have dirtied more
 	 * pages in the target range. We need to make sure that writeback for
 	 * any such pages does not start while we are logging the inode, because
@@ -2124,6 +2117,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 */
 	ret = start_ordered_ops(inode, start, end);
 	if (ret) {
+		up_write(&BTRFS_I(inode)->dio_sem);
 		inode_unlock(inode);
 		goto out;
 	}
@@ -2486,6 +2480,11 @@ static int btrfs_insert_clone_extent(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
 
+	ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+			clone_info->file_offset, clone_len);
+	if (ret)
+		return ret;
+
 	/* If it's a hole, nothing more needs to be done. */
 	if (clone_info->disk_offset == 0)
 		return 0;
@@ -2596,6 +2595,24 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
 				btrfs_abort_transaction(trans, ret);
 				break;
 			}
+		} else if (!clone_info && cur_offset < drop_end) {
+			/*
+			 * We are past the i_size here, but since we didn't
+			 * insert holes we need to clear the mapped area so we
+			 * know to not set disk_i_size in this area until a new
+			 * file extent is inserted here.
+			 */
+			ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+					cur_offset, drop_end - cur_offset);
+			if (ret) {
+				/*
+				 * We couldn't clear our area, so we could
+				 * presumably adjust up and corrupt the fs, so
+				 * we need to abort.
+				 */
+				btrfs_abort_transaction(trans, ret);
+				break;
+			}
 		}
 
 		if (clone_info && drop_end > clone_info->file_offset) {
@@ -2686,6 +2703,15 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
 			btrfs_abort_transaction(trans, ret);
 			goto out_trans;
 		}
+	} else if (!clone_info && cur_offset < drop_end) {
+		/* See the comment in the loop above for the reasoning here. */
+		ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+					cur_offset, drop_end - cur_offset);
+		if (ret) {
+			btrfs_abort_transaction(trans, ret);
+			goto out_trans;
+		}
+
 	}
 	if (clone_info) {
 		ret = btrfs_insert_clone_extent(trans, inode, path, clone_info,
@@ -2935,7 +2961,7 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
 
 	inode->i_ctime = current_time(inode);
 	i_size_write(inode, end);
-	btrfs_ordered_update_i_size(inode, end, NULL);
+	btrfs_inode_safe_disk_i_size_write(inode, 0);
 	ret = btrfs_update_inode(trans, root, inode);
 	ret2 = btrfs_end_transaction(trans);
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-03-31 13:00:16 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-03-31 13:00:16 -0700
commit	15c981d16d70e8a5be297fa4af07a64ab7e080ed (patch)
tree	9487ba1525d75501cd1a3896dac4e6321efd3a55 /fs/btrfs/file.c
parent	1455c69900c8c6442b182a74087931f4ffb1cac4 (diff)
parent	6ff06729c22ec0b7498d900d79cc88cfb8aceaeb (diff)
download	linux-15c981d16d70e8a5be297fa4af07a64ab7e080ed.tar.bz2