diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-31 13:00:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-31 13:00:16 -0700 |
commit | 15c981d16d70e8a5be297fa4af07a64ab7e080ed (patch) | |
tree | 9487ba1525d75501cd1a3896dac4e6321efd3a55 /fs/btrfs/locking.c | |
parent | 1455c69900c8c6442b182a74087931f4ffb1cac4 (diff) | |
parent | 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb (diff) | |
download | linux-15c981d16d70e8a5be297fa4af07a64ab7e080ed.tar.bz2 |
Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"A number of core changes that make things work better in general, code
is simpler and cleaner.
Core changes:
- per-inode file extent tree, for in memory tracking of contiguous
extent ranges to make sure i_size adjustments are accurate
- tree root structures are protected by reference counts, replacing
SRCU that did not cover some cases
- leak detector for tree root structures
- per-transaction pinned extent tracking
- buffer heads are replaced by bios for super block access
- speedup of extent back reference resolution, on an example test
scenario the runtime of send went down from a hour to minutes
- factor out locking scheme used for subvolume writer and NOCOW
exclusion, abstracted as DREW lock, double reader-writer exclusion
(allow either readers or writers)
- cleanup and abstract extent allocation policies, preparation for
zoned device support
- make reflink/clone_range work on inline extents
- add more cancellation point for relocation, improves long response
from 'balance cancel'
- add page migration callback for data pages
- switch to guid for uuids, with additional cleanups of the interface
- make ranged full fsyncs more efficient
- removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC
- remove b-tree readahead from delayed refs paths, avoiding seek and
read unnecessary blocks
Features:
- v2 of ioctl to delete subvolumes, allowing to delete by id and more
future extensions
Fixes:
- fix qgroup rescan worker that could block umount
- fix crash during unmount due to race with delayed inode workers
- fix dellaloc flushing logic that could create unnecessary chunks
under heavy load
- fix missing file extent item for hole after ranged fsync
- several fixes in relocation error handling
Other:
- more documentation of relocation, device replace, space
reservations
- many random cleanups"
* tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits)
btrfs: fix missing semaphore unlock in btrfs_sync_file
btrfs: use nofs allocations for running delayed items
btrfs: sysfs: Use scnprintf() instead of snprintf()
btrfs: do not resolve backrefs for roots that are being deleted
btrfs: track reloc roots based on their commit root bytenr
btrfs: restart relocate_tree_blocks properly
btrfs: reloc: reorder reservation before root selection
btrfs: do not readahead in build_backref_tree
btrfs: do not use readahead for running delayed refs
btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot
btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid
btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support
btrfs: kill the subvol_srcu
btrfs: make btrfs_cleanup_fs_roots use the radix tree lock
btrfs: don't take an extra root ref at allocation time
btrfs: hold a ref on the root on the dead roots list
btrfs: make inodes hold a ref on their roots
btrfs: move the root freeing stuff into btrfs_put_root
btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root
btrfs: make the extent buffer leak check per fs info
...
Diffstat (limited to 'fs/btrfs/locking.c')
-rw-r--r-- | fs/btrfs/locking.c | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 571c4826c428..fb647d8cf527 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -523,3 +523,138 @@ void btrfs_unlock_up_safe(struct btrfs_path *path, int level) path->locks[i] = 0; } } + +/* + * Loop around taking references on and locking the root node of the tree until + * we end up with a lock on the root node. + * + * Return: root extent buffer with write lock held + */ +struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + + while (1) { + eb = btrfs_root_node(root); + btrfs_tree_lock(eb); + if (eb == root->node) + break; + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + return eb; +} + +/* + * Loop around taking references on and locking the root node of the tree until + * we end up with a lock on the root node. + * + * Return: root extent buffer with read lock held + */ +struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + + while (1) { + eb = btrfs_root_node(root); + btrfs_tree_read_lock(eb); + if (eb == root->node) + break; + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + } + return eb; +} + +/* + * DREW locks + * ========== + * + * DREW stands for double-reader-writer-exclusion lock. It's used in situation + * where you want to provide A-B exclusion but not AA or BB. + * + * Currently implementation gives more priority to reader. If a reader and a + * writer both race to acquire their respective sides of the lock the writer + * would yield its lock as soon as it detects a concurrent reader. Additionally + * if there are pending readers no new writers would be allowed to come in and + * acquire the lock. + */ + +int btrfs_drew_lock_init(struct btrfs_drew_lock *lock) +{ + int ret; + + ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL); + if (ret) + return ret; + + atomic_set(&lock->readers, 0); + init_waitqueue_head(&lock->pending_readers); + init_waitqueue_head(&lock->pending_writers); + + return 0; +} + +void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock) +{ + percpu_counter_destroy(&lock->writers); +} + +/* Return true if acquisition is successful, false otherwise */ +bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) +{ + if (atomic_read(&lock->readers)) + return false; + + percpu_counter_inc(&lock->writers); + + /* Ensure writers count is updated before we check for pending readers */ + smp_mb(); + if (atomic_read(&lock->readers)) { + btrfs_drew_write_unlock(lock); + return false; + } + + return true; +} + +void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) +{ + while (true) { + if (btrfs_drew_try_write_lock(lock)) + return; + wait_event(lock->pending_writers, !atomic_read(&lock->readers)); + } +} + +void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) +{ + percpu_counter_dec(&lock->writers); + cond_wake_up(&lock->pending_readers); +} + +void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) +{ + atomic_inc(&lock->readers); + + /* + * Ensure the pending reader count is perceieved BEFORE this reader + * goes to sleep in case of active writers. This guarantees new writers + * won't be allowed and that the current reader will be woken up when + * the last active writer finishes its jobs. + */ + smp_mb__after_atomic(); + + wait_event(lock->pending_readers, + percpu_counter_sum(&lock->writers) == 0); +} + +void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) +{ + /* + * atomic_dec_and_test implies a full barrier, so woken up writers + * are guaranteed to see the decrement + */ + if (atomic_dec_and_test(&lock->readers)) + wake_up(&lock->pending_writers); +} |