diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 19:59:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 19:59:25 -0700 |
commit | f3cdc8ae116e27d84e1f33c7a2995960cebb73ac (patch) | |
tree | db3dbbbbf82b76590f601b5caee5de3bef151c4b /fs/btrfs/space-info.c | |
parent | 8eeae5bae1239c030ba0b34cac97ebd5e7ec1886 (diff) | |
parent | 2166e5edce9ac1edf3b113d6091ef72fcac2d6c4 (diff) | |
download | linux-f3cdc8ae116e27d84e1f33c7a2995960cebb73ac.tar.bz2 |
Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Highlights:
- speedup dead root detection during orphan cleanup, eg. when there
are many deleted subvolumes waiting to be cleaned, the trees are
now looked up in radix tree instead of a O(N^2) search
- snapshot creation with inherited qgroup will mark the qgroup
inconsistent, requires a rescan
- send will emit file capabilities after chown, this produces a
stream that does not need postprocessing to set the capabilities
again
- direct io ported to iomap infrastructure, cleaned up and simplified
code, notably removing last use of struct buffer_head in btrfs code
Core changes:
- factor out backreference iteration, to be used by ordinary
backreferences and relocation code
- improved global block reserve utilization
* better logic to serialize requests
* increased maximum available for unlink
* improved handling on large pages (64K)
- direct io cleanups and fixes
* simplify layering, where cloned bios were unnecessarily created
for some cases
* error handling fixes (submit, endio)
* remove repair worker thread, used to avoid deadlocks during
repair
- refactored block group reading code, preparatory work for new type
of block group storage that should improve mount time on large
filesystems
Cleanups:
- cleaned up (and slightly sped up) set/get helpers for metadata data
structure members
- root bit REF_COWS got renamed to SHAREABLE to reflect the that the
blocks of the tree get shared either among subvolumes or with the
relocation trees
Fixes:
- when subvolume deletion fails due to ENOSPC, the filesystem is not
turned read-only
- device scan deals with devices from other filesystems that changed
ownership due to overwrite (mkfs)
- fix a race between scrub and block group removal/allocation
- fix long standing bug of a runaway balance operation, printing the
same line to the syslog, caused by a stale status bit on a reloc
tree that prevented progress
- fix corrupt log due to concurrent fsync of inodes with shared
extents
- fix space underflow for NODATACOW and buffered writes when it for
some reason needs to fallback to COW mode"
* tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits)
btrfs: fix space_info bytes_may_use underflow during space cache writeout
btrfs: fix space_info bytes_may_use underflow after nocow buffered write
btrfs: fix wrong file range cleanup after an error filling dealloc range
btrfs: remove redundant local variable in read_block_for_search
btrfs: open code key_search
btrfs: split btrfs_direct_IO to read and write part
btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK
fs: remove dio_end_io()
btrfs: switch to iomap_dio_rw() for dio
iomap: remove lockdep_assert_held()
iomap: add a filesystem hook for direct I/O bio submission
fs: export generic_file_buffered_read()
btrfs: turn space cache writeout failure messages into debug messages
btrfs: include error on messages about failure to write space/inode caches
btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks()
btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums
btrfs: make checksum item extension more efficient
btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents
btrfs: unexport btrfs_compress_set_level()
btrfs: simplify iget helpers
...
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r-- | fs/btrfs/space-info.c | 81 |
1 files changed, 73 insertions, 8 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index ff17a4420358..41ee88633769 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -626,6 +626,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, struct reserve_ticket *ticket = NULL; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv; struct btrfs_trans_handle *trans; u64 bytes_needed; u64 reclaim_bytes = 0; @@ -688,6 +689,11 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, spin_lock(&delayed_refs_rsv->lock); reclaim_bytes += delayed_refs_rsv->reserved; spin_unlock(&delayed_refs_rsv->lock); + + spin_lock(&trans_rsv->lock); + reclaim_bytes += trans_rsv->reserved; + spin_unlock(&trans_rsv->lock); + if (reclaim_bytes >= bytes_needed) goto commit; bytes_needed -= reclaim_bytes; @@ -856,6 +862,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); } +static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) +{ + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + u64 min_bytes; + + if (global_rsv->space_info != space_info) + return false; + + spin_lock(&global_rsv->lock); + min_bytes = div_factor(global_rsv->size, 1); + if (global_rsv->reserved < min_bytes + ticket->bytes) { + spin_unlock(&global_rsv->lock); + return false; + } + global_rsv->reserved -= ticket->bytes; + ticket->bytes = 0; + list_del_init(&ticket->list); + wake_up(&ticket->wait); + space_info->tickets_id++; + if (global_rsv->reserved < global_rsv->size) + global_rsv->full = 0; + spin_unlock(&global_rsv->lock); + + return true; +} + /* * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets * @fs_info - fs_info for this fs @@ -888,6 +922,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, ticket = list_first_entry(&space_info->tickets, struct reserve_ticket, list); + if (ticket->steal && + steal_from_global_rsv(fs_info, space_info, ticket)) + return true; + /* * may_commit_transaction will avoid committing the transaction * if it doesn't feel like the space reclaimed by the commit @@ -1104,6 +1142,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, switch (flush) { case BTRFS_RESERVE_FLUSH_ALL: + case BTRFS_RESERVE_FLUSH_ALL_STEAL: wait_reserve_ticket(fs_info, space_info, ticket); break; case BTRFS_RESERVE_FLUSH_LIMIT: @@ -1125,11 +1164,17 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, ret = ticket->error; if (ticket->bytes || ticket->error) { /* - * Need to delete here for priority tickets. For regular tickets - * either the async reclaim job deletes the ticket from the list - * or we delete it ourselves at wait_reserve_ticket(). + * We were a priority ticket, so we need to delete ourselves + * from the list. Because we could have other priority tickets + * behind us that require less space, run + * btrfs_try_granting_tickets() to see if their reservations can + * now be made. */ - remove_ticket(space_info, ticket); + if (!list_empty(&ticket->list)) { + remove_ticket(space_info, ticket); + btrfs_try_granting_tickets(fs_info, space_info); + } + if (!ret) ret = -ENOSPC; } @@ -1145,6 +1190,16 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, return ret; } +/* + * This returns true if this flush state will go through the ordinary flushing + * code. + */ +static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush) +{ + return (flush == BTRFS_RESERVE_FLUSH_ALL) || + (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL); +} + /** * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * @root - the root we're allocating for @@ -1175,8 +1230,17 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); ret = -ENOSPC; used = btrfs_space_info_used(space_info, true); - pending_tickets = !list_empty(&space_info->tickets) || - !list_empty(&space_info->priority_tickets); + + /* + * We don't want NO_FLUSH allocations to jump everybody, they can + * generally handle ENOSPC in a different way, so treat them the same as + * normal flushers when it comes to skipping pending tickets. + */ + if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH)) + pending_tickets = !list_empty(&space_info->tickets) || + !list_empty(&space_info->priority_tickets); + else + pending_tickets = !list_empty(&space_info->priority_tickets); /* * Carry on if we have enough space (short-circuit) OR call @@ -1198,12 +1262,13 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, * the list and we will do our own flushing further down. */ if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { - ASSERT(space_info->reclaim_size >= 0); ticket.bytes = orig_bytes; ticket.error = 0; space_info->reclaim_size += ticket.bytes; init_waitqueue_head(&ticket.wait); - if (flush == BTRFS_RESERVE_FLUSH_ALL) { + ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL); + if (flush == BTRFS_RESERVE_FLUSH_ALL || + flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) { list_add_tail(&ticket.list, &space_info->tickets); if (!space_info->flush) { space_info->flush = 1; |