summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/space-info.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 19:59:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 19:59:25 -0700
commitf3cdc8ae116e27d84e1f33c7a2995960cebb73ac (patch)
treedb3dbbbbf82b76590f601b5caee5de3bef151c4b /fs/btrfs/space-info.c
parent8eeae5bae1239c030ba0b34cac97ebd5e7ec1886 (diff)
parent2166e5edce9ac1edf3b113d6091ef72fcac2d6c4 (diff)
downloadlinux-f3cdc8ae116e27d84e1f33c7a2995960cebb73ac.tar.bz2
Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Highlights: - speedup dead root detection during orphan cleanup, eg. when there are many deleted subvolumes waiting to be cleaned, the trees are now looked up in radix tree instead of a O(N^2) search - snapshot creation with inherited qgroup will mark the qgroup inconsistent, requires a rescan - send will emit file capabilities after chown, this produces a stream that does not need postprocessing to set the capabilities again - direct io ported to iomap infrastructure, cleaned up and simplified code, notably removing last use of struct buffer_head in btrfs code Core changes: - factor out backreference iteration, to be used by ordinary backreferences and relocation code - improved global block reserve utilization * better logic to serialize requests * increased maximum available for unlink * improved handling on large pages (64K) - direct io cleanups and fixes * simplify layering, where cloned bios were unnecessarily created for some cases * error handling fixes (submit, endio) * remove repair worker thread, used to avoid deadlocks during repair - refactored block group reading code, preparatory work for new type of block group storage that should improve mount time on large filesystems Cleanups: - cleaned up (and slightly sped up) set/get helpers for metadata data structure members - root bit REF_COWS got renamed to SHAREABLE to reflect the that the blocks of the tree get shared either among subvolumes or with the relocation trees Fixes: - when subvolume deletion fails due to ENOSPC, the filesystem is not turned read-only - device scan deals with devices from other filesystems that changed ownership due to overwrite (mkfs) - fix a race between scrub and block group removal/allocation - fix long standing bug of a runaway balance operation, printing the same line to the syslog, caused by a stale status bit on a reloc tree that prevented progress - fix corrupt log due to concurrent fsync of inodes with shared extents - fix space underflow for NODATACOW and buffered writes when it for some reason needs to fallback to COW mode" * tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits) btrfs: fix space_info bytes_may_use underflow during space cache writeout btrfs: fix space_info bytes_may_use underflow after nocow buffered write btrfs: fix wrong file range cleanup after an error filling dealloc range btrfs: remove redundant local variable in read_block_for_search btrfs: open code key_search btrfs: split btrfs_direct_IO to read and write part btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK fs: remove dio_end_io() btrfs: switch to iomap_dio_rw() for dio iomap: remove lockdep_assert_held() iomap: add a filesystem hook for direct I/O bio submission fs: export generic_file_buffered_read() btrfs: turn space cache writeout failure messages into debug messages btrfs: include error on messages about failure to write space/inode caches btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks() btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums btrfs: make checksum item extension more efficient btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents btrfs: unexport btrfs_compress_set_level() btrfs: simplify iget helpers ...
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r--fs/btrfs/space-info.c81
1 files changed, 73 insertions, 8 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ff17a4420358..41ee88633769 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -626,6 +626,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
struct reserve_ticket *ticket = NULL;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
struct btrfs_trans_handle *trans;
u64 bytes_needed;
u64 reclaim_bytes = 0;
@@ -688,6 +689,11 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_refs_rsv->lock);
reclaim_bytes += delayed_refs_rsv->reserved;
spin_unlock(&delayed_refs_rsv->lock);
+
+ spin_lock(&trans_rsv->lock);
+ reclaim_bytes += trans_rsv->reserved;
+ spin_unlock(&trans_rsv->lock);
+
if (reclaim_bytes >= bytes_needed)
goto commit;
bytes_needed -= reclaim_bytes;
@@ -856,6 +862,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
+static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+{
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ u64 min_bytes;
+
+ if (global_rsv->space_info != space_info)
+ return false;
+
+ spin_lock(&global_rsv->lock);
+ min_bytes = div_factor(global_rsv->size, 1);
+ if (global_rsv->reserved < min_bytes + ticket->bytes) {
+ spin_unlock(&global_rsv->lock);
+ return false;
+ }
+ global_rsv->reserved -= ticket->bytes;
+ ticket->bytes = 0;
+ list_del_init(&ticket->list);
+ wake_up(&ticket->wait);
+ space_info->tickets_id++;
+ if (global_rsv->reserved < global_rsv->size)
+ global_rsv->full = 0;
+ spin_unlock(&global_rsv->lock);
+
+ return true;
+}
+
/*
* maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
* @fs_info - fs_info for this fs
@@ -888,6 +922,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
+ if (ticket->steal &&
+ steal_from_global_rsv(fs_info, space_info, ticket))
+ return true;
+
/*
* may_commit_transaction will avoid committing the transaction
* if it doesn't feel like the space reclaimed by the commit
@@ -1104,6 +1142,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
switch (flush) {
case BTRFS_RESERVE_FLUSH_ALL:
+ case BTRFS_RESERVE_FLUSH_ALL_STEAL:
wait_reserve_ticket(fs_info, space_info, ticket);
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
@@ -1125,11 +1164,17 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
ret = ticket->error;
if (ticket->bytes || ticket->error) {
/*
- * Need to delete here for priority tickets. For regular tickets
- * either the async reclaim job deletes the ticket from the list
- * or we delete it ourselves at wait_reserve_ticket().
+ * We were a priority ticket, so we need to delete ourselves
+ * from the list. Because we could have other priority tickets
+ * behind us that require less space, run
+ * btrfs_try_granting_tickets() to see if their reservations can
+ * now be made.
*/
- remove_ticket(space_info, ticket);
+ if (!list_empty(&ticket->list)) {
+ remove_ticket(space_info, ticket);
+ btrfs_try_granting_tickets(fs_info, space_info);
+ }
+
if (!ret)
ret = -ENOSPC;
}
@@ -1145,6 +1190,16 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
return ret;
}
+/*
+ * This returns true if this flush state will go through the ordinary flushing
+ * code.
+ */
+static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
+{
+ return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
+ (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
+}
+
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
@@ -1175,8 +1230,17 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
ret = -ENOSPC;
used = btrfs_space_info_used(space_info, true);
- pending_tickets = !list_empty(&space_info->tickets) ||
- !list_empty(&space_info->priority_tickets);
+
+ /*
+ * We don't want NO_FLUSH allocations to jump everybody, they can
+ * generally handle ENOSPC in a different way, so treat them the same as
+ * normal flushers when it comes to skipping pending tickets.
+ */
+ if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
+ pending_tickets = !list_empty(&space_info->tickets) ||
+ !list_empty(&space_info->priority_tickets);
+ else
+ pending_tickets = !list_empty(&space_info->priority_tickets);
/*
* Carry on if we have enough space (short-circuit) OR call
@@ -1198,12 +1262,13 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
* the list and we will do our own flushing further down.
*/
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
- ASSERT(space_info->reclaim_size >= 0);
ticket.bytes = orig_bytes;
ticket.error = 0;
space_info->reclaim_size += ticket.bytes;
init_waitqueue_head(&ticket.wait);
- if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+ ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
+ if (flush == BTRFS_RESERVE_FLUSH_ALL ||
+ flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
list_add_tail(&ticket.list, &space_info->tickets);
if (!space_info->flush) {
space_info->flush = 1;