diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 424 |
1 files changed, 256 insertions, 168 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2d9074295d7f..a4cd0221bc8d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2374,7 +2374,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) { struct btrfs_delayed_ref_node *ref; - if (RB_EMPTY_ROOT(&head->ref_tree)) + if (RB_EMPTY_ROOT(&head->ref_tree.rb_root)) return NULL; /* @@ -2387,7 +2387,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) return list_first_entry(&head->ref_add_list, struct btrfs_delayed_ref_node, add_list); - ref = rb_entry(rb_first(&head->ref_tree), + ref = rb_entry(rb_first_cached(&head->ref_tree), struct btrfs_delayed_ref_node, ref_node); ASSERT(list_empty(&ref->add_list)); return ref; @@ -2448,13 +2448,13 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, spin_unlock(&head->lock); spin_lock(&delayed_refs->lock); spin_lock(&head->lock); - if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) { + if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) { spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); return 1; } delayed_refs->num_heads--; - rb_erase(&head->href_node, &delayed_refs->href_root); + rb_erase_cached(&head->href_node, &delayed_refs->href_root); RB_CLEAR_NODE(&head->href_node); spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); @@ -2502,102 +2502,66 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, return 0; } -/* - * Returns 0 on success or if called with an already aborted transaction. - * Returns -ENOMEM or -EIO on failure and will abort the transaction. - */ -static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - unsigned long nr) +static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head( + struct btrfs_trans_handle *trans) +{ + struct btrfs_delayed_ref_root *delayed_refs = + &trans->transaction->delayed_refs; + struct btrfs_delayed_ref_head *head = NULL; + int ret; + + spin_lock(&delayed_refs->lock); + head = btrfs_select_ref_head(delayed_refs); + if (!head) { + spin_unlock(&delayed_refs->lock); + return head; + } + + /* + * Grab the lock that says we are going to process all the refs for + * this head + */ + ret = btrfs_delayed_ref_lock(delayed_refs, head); + spin_unlock(&delayed_refs->lock); + + /* + * We may have dropped the spin lock to get the head mutex lock, and + * that might have given someone else time to free the head. If that's + * true, it has been removed from our list and we can move on. + */ + if (ret == -EAGAIN) + head = ERR_PTR(-EAGAIN); + + return head; +} + +static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_head *locked_ref, + unsigned long *run_refs) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_delayed_ref_node *ref; - struct btrfs_delayed_ref_head *locked_ref = NULL; struct btrfs_delayed_extent_op *extent_op; - ktime_t start = ktime_get(); - int ret; - unsigned long count = 0; - unsigned long actual_count = 0; + struct btrfs_delayed_ref_node *ref; int must_insert_reserved = 0; + int ret; delayed_refs = &trans->transaction->delayed_refs; - while (1) { - if (!locked_ref) { - if (count >= nr) - break; - spin_lock(&delayed_refs->lock); - locked_ref = btrfs_select_ref_head(trans); - if (!locked_ref) { - spin_unlock(&delayed_refs->lock); - break; - } + lockdep_assert_held(&locked_ref->mutex); + lockdep_assert_held(&locked_ref->lock); - /* grab the lock that says we are going to process - * all the refs for this head */ - ret = btrfs_delayed_ref_lock(trans, locked_ref); - spin_unlock(&delayed_refs->lock); - /* - * we may have dropped the spin lock to get the head - * mutex lock, and that might have given someone else - * time to free the head. If that's true, it has been - * removed from our list and we can move on. - */ - if (ret == -EAGAIN) { - locked_ref = NULL; - count++; - continue; - } - } - - /* - * We need to try and merge add/drops of the same ref since we - * can run into issues with relocate dropping the implicit ref - * and then it being added back again before the drop can - * finish. If we merged anything we need to re-loop so we can - * get a good ref. - * Or we can get node references of the same type that weren't - * merged when created due to bumps in the tree mod seq, and - * we need to merge them to prevent adding an inline extent - * backref before dropping it (triggering a BUG_ON at - * insert_inline_extent_backref()). - */ - spin_lock(&locked_ref->lock); - btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); - - ref = select_delayed_ref(locked_ref); - - if (ref && ref->seq && + while ((ref = select_delayed_ref(locked_ref))) { + if (ref->seq && btrfs_check_delayed_seq(fs_info, ref->seq)) { spin_unlock(&locked_ref->lock); unselect_delayed_ref_head(delayed_refs, locked_ref); - locked_ref = NULL; - cond_resched(); - count++; - continue; - } - - /* - * We're done processing refs in this ref_head, clean everything - * up and move on to the next ref_head. - */ - if (!ref) { - ret = cleanup_ref_head(trans, locked_ref); - if (ret > 0 ) { - /* We dropped our lock, we need to loop. */ - ret = 0; - continue; - } else if (ret) { - return ret; - } - locked_ref = NULL; - count++; - continue; + return -EAGAIN; } - actual_count++; + (*run_refs)++; ref->in_tree = 0; - rb_erase(&ref->ref_node, &locked_ref->ref_tree); + rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree); RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) list_del(&ref->add_list); @@ -2619,8 +2583,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, atomic_dec(&delayed_refs->num_entries); /* - * Record the must-insert_reserved flag before we drop the spin - * lock. + * Record the must_insert_reserved flag before we drop the + * spin lock. */ must_insert_reserved = locked_ref->must_insert_reserved; locked_ref->must_insert_reserved = 0; @@ -2642,10 +2606,90 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } btrfs_put_delayed_ref(ref); - count++; cond_resched(); + + spin_lock(&locked_ref->lock); + btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); } + return 0; +} + +/* + * Returns 0 on success or if called with an already aborted transaction. + * Returns -ENOMEM or -EIO on failure and will abort the transaction. + */ +static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, + unsigned long nr) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *locked_ref = NULL; + ktime_t start = ktime_get(); + int ret; + unsigned long count = 0; + unsigned long actual_count = 0; + + delayed_refs = &trans->transaction->delayed_refs; + do { + if (!locked_ref) { + locked_ref = btrfs_obtain_ref_head(trans); + if (IS_ERR_OR_NULL(locked_ref)) { + if (PTR_ERR(locked_ref) == -EAGAIN) { + continue; + } else { + break; + } + } + count++; + } + /* + * We need to try and merge add/drops of the same ref since we + * can run into issues with relocate dropping the implicit ref + * and then it being added back again before the drop can + * finish. If we merged anything we need to re-loop so we can + * get a good ref. + * Or we can get node references of the same type that weren't + * merged when created due to bumps in the tree mod seq, and + * we need to merge them to prevent adding an inline extent + * backref before dropping it (triggering a BUG_ON at + * insert_inline_extent_backref()). + */ + spin_lock(&locked_ref->lock); + btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); + + ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, + &actual_count); + if (ret < 0 && ret != -EAGAIN) { + /* + * Error, btrfs_run_delayed_refs_for_head already + * unlocked everything so just bail out + */ + return ret; + } else if (!ret) { + /* + * Success, perform the usual cleanup of a processed + * head + */ + ret = cleanup_ref_head(trans, locked_ref); + if (ret > 0 ) { + /* We dropped our lock, we need to loop. */ + ret = 0; + continue; + } else if (ret) { + return ret; + } + } + + /* + * Either success case or btrfs_run_delayed_refs_for_head + * returned -EAGAIN, meaning we need to select another head + */ + + locked_ref = NULL; + cond_resched(); + } while ((nr != -1 && count < nr) || locked_ref); + /* * We don't want to include ref heads since we can have empty ref heads * and those will drastically skew our runtime down since we just do @@ -2745,9 +2789,9 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes) return num_csums; } -int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_rsv *global_rsv; u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; @@ -2782,8 +2826,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, return ret; } -int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) { u64 num_entries = atomic_read(&trans->transaction->delayed_refs.num_entries); @@ -2791,14 +2834,14 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, u64 val; smp_mb(); - avg_runtime = fs_info->avg_delayed_ref_runtime; + avg_runtime = trans->fs_info->avg_delayed_ref_runtime; val = num_entries * avg_runtime; if (val >= NSEC_PER_SEC) return 1; if (val >= NSEC_PER_SEC / 2) return 2; - return btrfs_check_space_for_delayed_refs(trans, fs_info); + return btrfs_check_space_for_delayed_refs(trans); } struct async_delayed_refs { @@ -2940,7 +2983,7 @@ again: btrfs_create_pending_block_groups(trans); spin_lock(&delayed_refs->lock); - node = rb_first(&delayed_refs->href_root); + node = rb_first_cached(&delayed_refs->href_root); if (!node) { spin_unlock(&delayed_refs->lock); goto out; @@ -3040,7 +3083,8 @@ static noinline int check_delayed_ref(struct btrfs_root *root, * XXX: We should replace this with a proper search function in the * future. */ - for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) { + for (node = rb_first_cached(&head->ref_tree); node; + node = rb_next(node)) { ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); /* If it's a shared ref we know a cross reference exists */ if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { @@ -3139,7 +3183,6 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, { struct btrfs_path *path; int ret; - int ret2; path = btrfs_alloc_path(); if (!path) @@ -3151,17 +3194,9 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, if (ret && ret != -ENOENT) goto out; - ret2 = check_delayed_ref(root, path, objectid, - offset, bytenr); - } while (ret2 == -EAGAIN); - - if (ret2 && ret2 != -ENOENT) { - ret = ret2; - goto out; - } + ret = check_delayed_ref(root, path, objectid, offset, bytenr); + } while (ret == -EAGAIN); - if (ret != -ENOENT || ret2 != -ENOENT) - ret = 0; out: btrfs_free_path(path); if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) @@ -5284,7 +5319,7 @@ static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, } static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, - u64 num_bytes, int update_size) + u64 num_bytes, bool update_size) { spin_lock(&block_rsv->lock); block_rsv->reserved += num_bytes; @@ -5316,7 +5351,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, global_rsv->full = 0; spin_unlock(&global_rsv->lock); - block_rsv_add_bytes(dest, num_bytes, 1); + block_rsv_add_bytes(dest, num_bytes, true); return 0; } @@ -5479,7 +5514,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, struct btrfs_block_rsv *dst, u64 num_bytes, - int update_size) + bool update_size) { int ret; @@ -5539,10 +5574,8 @@ int btrfs_block_rsv_add(struct btrfs_root *root, return 0; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); - if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, 1); - return 0; - } + if (!ret) + block_rsv_add_bytes(block_rsv, num_bytes, true); return ret; } @@ -5587,7 +5620,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, 0); + block_rsv_add_bytes(block_rsv, num_bytes, false); return 0; } @@ -5629,7 +5662,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, return ret; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, 0); + block_rsv_add_bytes(block_rsv, num_bytes, false); trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), num_bytes, 1); @@ -5835,7 +5868,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_RESERVE_FLUSH_ALL); if (ret == -ENOSPC && use_global_rsv) - ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1); + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true); if (ret && qgroup_num_bytes) btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); @@ -6399,10 +6432,6 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, } else { cache->reserved += num_bytes; space_info->bytes_reserved += num_bytes; - - trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - ram_bytes, 0); space_info->bytes_may_use -= ram_bytes; if (delalloc) cache->delalloc_bytes += num_bytes; @@ -6424,11 +6453,10 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, * reserve set to 0 in order to clear the reservation. */ -static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int delalloc) +static void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; - int ret = 0; spin_lock(&space_info->lock); spin_lock(&cache->lock); @@ -6441,7 +6469,6 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, cache->delalloc_bytes -= num_bytes; spin_unlock(&cache->lock); spin_unlock(&space_info->lock); - return ret; } void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) { @@ -6925,7 +6952,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, goto out_delayed_unlock; spin_lock(&head->lock); - if (!RB_EMPTY_ROOT(&head->ref_tree)) + if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root)) goto out; if (head->extent_op) { @@ -6946,7 +6973,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, * at this point we have a head with no other entries. Go * ahead and process it. */ - rb_erase(&head->href_node, &delayed_refs->href_root); + rb_erase_cached(&head->href_node, &delayed_refs->href_root); RB_CLEAR_NODE(&head->href_node); atomic_dec(&delayed_refs->num_entries); @@ -8119,6 +8146,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (IS_ERR(buf)) return buf; + /* + * Extra safety check in case the extent tree is corrupted and extent + * allocator chooses to use a tree block which is already used and + * locked. + */ + if (buf->lock_owner == current->pid) { + btrfs_err_rl(fs_info, +"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", + buf->start, btrfs_header_owner(buf), current->pid); + free_extent_buffer(buf); + return ERR_PTR(-EUCLEAN); + } + btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); clean_tree_block(fs_info, buf); @@ -8215,7 +8255,7 @@ try_reserve: static void unuse_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u32 blocksize) { - block_rsv_add_bytes(block_rsv, blocksize, 0); + block_rsv_add_bytes(block_rsv, blocksize, false); block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); } @@ -8642,7 +8682,13 @@ skip: parent = 0; } - if (need_account) { + /* + * Reloc tree doesn't contribute to qgroup numbers, and we have + * already accounted them at merge time (replace_path), + * thus we could skip expensive subtree trace here. + */ + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && + need_account) { ret = btrfs_qgroup_trace_subtree(trans, next, generation, level - 1); if (ret) { @@ -8763,15 +8809,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (eb == root->node) { if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = eb->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(eb)); + else if (root->root_key.objectid != btrfs_header_owner(eb)) + goto owner_mismatch; } else { if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) parent = path->nodes[level + 1]->start; - else - BUG_ON(root->root_key.objectid != - btrfs_header_owner(path->nodes[level + 1])); + else if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])) + goto owner_mismatch; } btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); @@ -8779,6 +8824,11 @@ out: wc->refs[level] = 0; wc->flags[level] = 0; return 0; + +owner_mismatch: + btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu", + btrfs_header_owner(eb), root->root_key.objectid); + return -EUCLEAN; } static noinline int walk_down_tree(struct btrfs_trans_handle *trans, @@ -8832,6 +8882,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ret = walk_up_proc(trans, root, path, wc); if (ret > 0) return 0; + if (ret < 0) + return ret; if (path->locks[level]) { btrfs_tree_unlock_rw(path->nodes[level], @@ -8875,7 +8927,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; bool root_dropped = false; - btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid); + btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid); path = btrfs_alloc_path(); if (!path) { @@ -9613,6 +9665,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group = btrfs_lookup_first_block_group(info, last); while (block_group) { + wait_block_group_cache_done(block_group); spin_lock(&block_group->lock); if (block_group->iref) break; @@ -10074,7 +10127,7 @@ error: void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_block_group_cache *block_group, *tmp; + struct btrfs_block_group_cache *block_group; struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_block_group_item item; struct btrfs_key key; @@ -10082,7 +10135,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) bool can_flush_pending_bgs = trans->can_flush_pending_bgs; trans->can_flush_pending_bgs = false; - list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { + while (!list_empty(&trans->new_bgs)) { + block_group = list_first_entry(&trans->new_bgs, + struct btrfs_block_group_cache, + bg_list); if (ret) goto next; @@ -10753,14 +10809,16 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * We don't want a transaction for this since the discard may take a * substantial amount of time. We don't require that a transaction be * running, but we do need to take a running transaction into account - * to ensure that we're not discarding chunks that were released in - * the current transaction. + * to ensure that we're not discarding chunks that were released or + * allocated in the current transaction. * * Holding the chunks lock will prevent other threads from allocating * or releasing chunks, but it won't prevent a running transaction * from committing and releasing the memory that the pending chunks * list head uses. For that, we need to take a reference to the - * transaction. + * transaction and hold the commit root sem. We only need to hold + * it while performing the free space search since we have already + * held back allocations. */ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 minlen, u64 *trimmed) @@ -10770,6 +10828,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, *trimmed = 0; + /* Discard not supported = nothing to do. */ + if (!blk_queue_discard(bdev_get_queue(device->bdev))) + return 0; + /* Not writeable = nothing to do. */ if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) return 0; @@ -10787,9 +10849,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, ret = mutex_lock_interruptible(&fs_info->chunk_mutex); if (ret) - return ret; + break; - down_read(&fs_info->commit_root_sem); + ret = down_read_killable(&fs_info->commit_root_sem); + if (ret) { + mutex_unlock(&fs_info->chunk_mutex); + break; + } spin_lock(&fs_info->trans_lock); trans = fs_info->running_transaction; @@ -10797,13 +10863,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, refcount_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); + if (!trans) + up_read(&fs_info->commit_root_sem); + ret = find_free_dev_extent_start(trans, device, minlen, start, &start, &len); - if (trans) + if (trans) { + up_read(&fs_info->commit_root_sem); btrfs_put_transaction(trans); + } if (ret) { - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret == -ENOSPC) ret = 0; @@ -10811,7 +10881,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, } ret = btrfs_issue_discard(device->bdev, start, len, &bytes); - up_read(&fs_info->commit_root_sem); mutex_unlock(&fs_info->chunk_mutex); if (ret) @@ -10831,6 +10900,15 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, return ret; } +/* + * Trim the whole filesystem by: + * 1) trimming the free space in each block group + * 2) trimming the unallocated space on each device + * + * This will also continue trimming even if a block group or device encounters + * an error. The return value will be the last error, or 0 if nothing bad + * happens. + */ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) { struct btrfs_block_group_cache *cache = NULL; @@ -10840,18 +10918,14 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; - u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); + u64 bg_failed = 0; + u64 dev_failed = 0; + int bg_ret = 0; + int dev_ret = 0; int ret = 0; - /* - * try to trim all FS space, our block group may start from non-zero. - */ - if (range->len == total_bytes) - cache = btrfs_lookup_first_block_group(fs_info, range->start); - else - cache = btrfs_lookup_block_group(fs_info, range->start); - - while (cache) { + cache = btrfs_lookup_first_block_group(fs_info, range->start); + for (; cache; cache = next_block_group(fs_info, cache)) { if (cache->key.objectid >= (range->start + range->len)) { btrfs_put_block_group(cache); break; @@ -10865,13 +10939,15 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (!block_group_cache_done(cache)) { ret = cache_block_group(cache, 0); if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } ret = wait_block_group_cache_done(cache); if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } } ret = btrfs_trim_block_group(cache, @@ -10882,28 +10958,40 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) trimmed += group_trimmed; if (ret) { - btrfs_put_block_group(cache); - break; + bg_failed++; + bg_ret = ret; + continue; } } - - cache = next_block_group(fs_info, cache); } + if (bg_failed) + btrfs_warn(fs_info, + "failed to trim %llu block group(s), last error %d", + bg_failed, bg_ret); mutex_lock(&fs_info->fs_devices->device_list_mutex); - devices = &fs_info->fs_devices->alloc_list; - list_for_each_entry(device, devices, dev_alloc_list) { + devices = &fs_info->fs_devices->devices; + list_for_each_entry(device, devices, dev_list) { ret = btrfs_trim_free_extents(device, range->minlen, &group_trimmed); - if (ret) + if (ret) { + dev_failed++; + dev_ret = ret; break; + } trimmed += group_trimmed; } mutex_unlock(&fs_info->fs_devices->device_list_mutex); + if (dev_failed) + btrfs_warn(fs_info, + "failed to trim %llu device(s), last error %d", + dev_failed, dev_ret); range->len = trimmed; - return ret; + if (bg_ret) + return bg_ret; + return dev_ret; } /* |