Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba: "A number of core changes that make things work better in general, code is simpler and cleaner. Core changes: - per-inode file extent tree, for in memory tracking of contiguous extent ranges to make sure i_size adjustments are accurate - tree root structures are protected by reference counts, replacing SRCU that did not cover some cases - leak detector for tree root structures - per-transaction pinned extent tracking - buffer heads are replaced by bios for super block access - speedup of extent back reference resolution, on an example test scenario the runtime of send went down from a hour to minutes - factor out locking scheme used for subvolume writer and NOCOW exclusion, abstracted as DREW lock, double reader-writer exclusion (allow either readers or writers) - cleanup and abstract extent allocation policies, preparation for zoned device support - make reflink/clone_range work on inline extents - add more cancellation point for relocation, improves long response from 'balance cancel' - add page migration callback for data pages - switch to guid for uuids, with additional cleanups of the interface - make ranged full fsyncs more efficient - removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC - remove b-tree readahead from delayed refs paths, avoiding seek and read unnecessary blocks Features: - v2 of ioctl to delete subvolumes, allowing to delete by id and more future extensions Fixes: - fix qgroup rescan worker that could block umount - fix crash during unmount due to race with delayed inode workers - fix dellaloc flushing logic that could create unnecessary chunks under heavy load - fix missing file extent item for hole after ranged fsync - several fixes in relocation error handling Other: - more documentation of relocation, device replace, space reservations - many random cleanups" * tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits) btrfs: fix missing semaphore unlock in btrfs_sync_file btrfs: use nofs allocations for running delayed items btrfs: sysfs: Use scnprintf() instead of snprintf() btrfs: do not resolve backrefs for roots that are being deleted btrfs: track reloc roots based on their commit root bytenr btrfs: restart relocate_tree_blocks properly btrfs: reloc: reorder reservation before root selection btrfs: do not readahead in build_backref_tree btrfs: do not use readahead for running delayed refs btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support btrfs: kill the subvol_srcu btrfs: make btrfs_cleanup_fs_roots use the radix tree lock btrfs: don't take an extra root ref at allocation time btrfs: hold a ref on the root on the dead roots list btrfs: make inodes hold a ref on their roots btrfs: move the root freeing stuff into btrfs_put_root btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root btrfs: make the extent buffer leak check per fs info ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-03-31 13:00:16 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-03-31 13:00:16 -0700
commit: 15c981d16d70e8a5be297fa4af07a64ab7e080ed (patch)
tree: 9487ba1525d75501cd1a3896dac4e6321efd3a55 /fs/btrfs/locking.c
parent: 1455c69900c8c6442b182a74087931f4ffb1cac4 (diff)
parent: 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb (diff)
download: linux-15c981d16d70e8a5be297fa4af07a64ab7e080ed.tar.bz2
1 files changed, 135 insertions, 0 deletions
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 571c4826c428..fb647d8cf527 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -523,3 +523,138 @@ void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 		path->locks[i] = 0;
 	}
 }
+
+/*
+ * Loop around taking references on and locking the root node of the tree until
+ * we end up with a lock on the root node.
+ *
+ * Return: root extent buffer with write lock held
+ */
+struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
+{
+	struct extent_buffer *eb;
+
+	while (1) {
+		eb = btrfs_root_node(root);
+		btrfs_tree_lock(eb);
+		if (eb == root->node)
+			break;
+		btrfs_tree_unlock(eb);
+		free_extent_buffer(eb);
+	}
+	return eb;
+}
+
+/*
+ * Loop around taking references on and locking the root node of the tree until
+ * we end up with a lock on the root node.
+ *
+ * Return: root extent buffer with read lock held
+ */
+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+{
+	struct extent_buffer *eb;
+
+	while (1) {
+		eb = btrfs_root_node(root);
+		btrfs_tree_read_lock(eb);
+		if (eb == root->node)
+			break;
+		btrfs_tree_read_unlock(eb);
+		free_extent_buffer(eb);
+	}
+	return eb;
+}
+
+/*
+ * DREW locks
+ * ==========
+ *
+ * DREW stands for double-reader-writer-exclusion lock. It's used in situation
+ * where you want to provide A-B exclusion but not AA or BB.
+ *
+ * Currently implementation gives more priority to reader. If a reader and a
+ * writer both race to acquire their respective sides of the lock the writer
+ * would yield its lock as soon as it detects a concurrent reader. Additionally
+ * if there are pending readers no new writers would be allowed to come in and
+ * acquire the lock.
+ */
+
+int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
+{
+	int ret;
+
+	ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	atomic_set(&lock->readers, 0);
+	init_waitqueue_head(&lock->pending_readers);
+	init_waitqueue_head(&lock->pending_writers);
+
+	return 0;
+}
+
+void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
+{
+	percpu_counter_destroy(&lock->writers);
+}
+
+/* Return true if acquisition is successful, false otherwise */
+bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
+{
+	if (atomic_read(&lock->readers))
+		return false;
+
+	percpu_counter_inc(&lock->writers);
+
+	/* Ensure writers count is updated before we check for pending readers */
+	smp_mb();
+	if (atomic_read(&lock->readers)) {
+		btrfs_drew_write_unlock(lock);
+		return false;
+	}
+
+	return true;
+}
+
+void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
+{
+	while (true) {
+		if (btrfs_drew_try_write_lock(lock))
+			return;
+		wait_event(lock->pending_writers, !atomic_read(&lock->readers));
+	}
+}
+
+void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
+{
+	percpu_counter_dec(&lock->writers);
+	cond_wake_up(&lock->pending_readers);
+}
+
+void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
+{
+	atomic_inc(&lock->readers);
+
+	/*
+	 * Ensure the pending reader count is perceieved BEFORE this reader
+	 * goes to sleep in case of active writers. This guarantees new writers
+	 * won't be allowed and that the current reader will be woken up when
+	 * the last active writer finishes its jobs.
+	 */
+	smp_mb__after_atomic();
+
+	wait_event(lock->pending_readers,
+		   percpu_counter_sum(&lock->writers) == 0);
+}
+
+void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
+{
+	/*
+	 * atomic_dec_and_test implies a full barrier, so woken up writers
+	 * are guaranteed to see the decrement
+	 */
+	if (atomic_dec_and_test(&lock->readers))
+		wake_up(&lock->pending_writers);
+}
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-03-31 13:00:16 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-03-31 13:00:16 -0700
commit	15c981d16d70e8a5be297fa4af07a64ab7e080ed (patch)
tree	9487ba1525d75501cd1a3896dac4e6321efd3a55 /fs/btrfs/locking.c
parent	1455c69900c8c6442b182a74087931f4ffb1cac4 (diff)
parent	6ff06729c22ec0b7498d900d79cc88cfb8aceaeb (diff)
download	linux-15c981d16d70e8a5be297fa4af07a64ab7e080ed.tar.bz2