diff options
Diffstat (limited to 'fs/btrfs/relocation.c')
-rw-r--r-- | fs/btrfs/relocation.c | 674 |
1 files changed, 315 insertions, 359 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 995d4b8b1cfd..d35936c934ab 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -9,6 +9,7 @@ #include <linux/blkdev.h> #include <linux/rbtree.h> #include <linux/slab.h> +#include <linux/error-injection.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -22,6 +23,54 @@ #include "print-tree.h" #include "delalloc-space.h" #include "block-group.h" +#include "backref.h" + +/* + * Relocation overview + * + * [What does relocation do] + * + * The objective of relocation is to relocate all extents of the target block + * group to other block groups. + * This is utilized by resize (shrink only), profile converting, compacting + * space, or balance routine to spread chunks over devices. + * + * Before | After + * ------------------------------------------------------------------ + * BG A: 10 data extents | BG A: deleted + * BG B: 2 data extents | BG B: 10 data extents (2 old + 8 relocated) + * BG C: 1 extents | BG C: 3 data extents (1 old + 2 relocated) + * + * [How does relocation work] + * + * 1. Mark the target block group read-only + * New extents won't be allocated from the target block group. + * + * 2.1 Record each extent in the target block group + * To build a proper map of extents to be relocated. + * + * 2.2 Build data reloc tree and reloc trees + * Data reloc tree will contain an inode, recording all newly relocated + * data extents. + * There will be only one data reloc tree for one data block group. + * + * Reloc tree will be a special snapshot of its source tree, containing + * relocated tree blocks. + * Each tree referring to a tree block in target block group will get its + * reloc tree built. + * + * 2.3 Swap source tree with its corresponding reloc tree + * Each involved tree only refers to new extents after swap. + * + * 3. Cleanup reloc trees and data reloc tree. + * As old extents in the target block group are still referenced by reloc + * trees, we need to clean them up before really freeing the target block + * group. + * + * The main complexity is in steps 2.2 and 2.3. + * + * The entry point of relocation is relocate_block_group() function. + */ /* * backref_node, mapping_node and tree_block start with this @@ -256,6 +305,7 @@ static void free_backref_node(struct backref_cache *cache, { if (node) { cache->nr_nodes--; + btrfs_put_root(node->root); kfree(node); } } @@ -561,8 +611,8 @@ static int should_ignore_root(struct btrfs_root *root) if (!reloc_root) return 0; - if (btrfs_root_last_snapshot(&reloc_root->root_item) == - root->fs_info->running_transaction->transid - 1) + if (btrfs_header_generation(reloc_root->commit_root) == + root->fs_info->running_transaction->transid) return 0; /* * if there is reloc tree and it was created in previous @@ -589,22 +639,7 @@ static struct btrfs_root *find_reloc_root(struct reloc_control *rc, root = (struct btrfs_root *)node->data; } spin_unlock(&rc->reloc_root_tree.lock); - return root; -} - -static int is_cowonly_root(u64 root_objectid) -{ - if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || - root_objectid == BTRFS_EXTENT_TREE_OBJECTID || - root_objectid == BTRFS_CHUNK_TREE_OBJECTID || - root_objectid == BTRFS_DEV_TREE_OBJECTID || - root_objectid == BTRFS_TREE_LOG_OBJECTID || - root_objectid == BTRFS_CSUM_TREE_OBJECTID || - root_objectid == BTRFS_UUID_TREE_OBJECTID || - root_objectid == BTRFS_QUOTA_TREE_OBJECTID || - root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) - return 1; - return 0; + return btrfs_grab_root(root); } static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, @@ -614,10 +649,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, key.objectid = root_objectid; key.type = BTRFS_ROOT_ITEM_KEY; - if (is_cowonly_root(root_objectid)) - key.offset = 0; - else - key.offset = (u64)-1; + key.offset = (u64)-1; return btrfs_get_fs_root(fs_info, &key, false); } @@ -711,8 +743,6 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } - path1->reada = READA_FORWARD; - path2->reada = READA_FORWARD; node = alloc_backref_node(cache); if (!node) { @@ -899,10 +929,12 @@ again: /* tree root */ ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr); - if (should_ignore_root(root)) + if (should_ignore_root(root)) { + btrfs_put_root(root); list_add(&cur->list, &useless); - else + } else { cur->root = root; + } break; } @@ -915,6 +947,7 @@ again: ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); path2->lowest_level = 0; if (ret < 0) { + btrfs_put_root(root); err = ret; goto out; } @@ -930,6 +963,7 @@ again: root->root_key.objectid, node_key->objectid, node_key->type, node_key->offset); + btrfs_put_root(root); err = -ENOENT; goto out; } @@ -941,15 +975,18 @@ again: if (!path2->nodes[level]) { ASSERT(btrfs_root_bytenr(&root->root_item) == lower->bytenr); - if (should_ignore_root(root)) + if (should_ignore_root(root)) { + btrfs_put_root(root); list_add(&lower->list, &useless); - else + } else { lower->root = root; + } break; } edge = alloc_backref_edge(cache); if (!edge) { + btrfs_put_root(root); err = -ENOMEM; goto out; } @@ -959,6 +996,7 @@ again: if (!rb_node) { upper = alloc_backref_node(cache); if (!upper) { + btrfs_put_root(root); free_backref_edge(cache, edge); err = -ENOMEM; goto out; @@ -1006,8 +1044,10 @@ again: edge->node[LOWER] = lower; edge->node[UPPER] = upper; - if (rb_node) + if (rb_node) { + btrfs_put_root(root); break; + } lower = upper; upper = NULL; } @@ -1186,7 +1226,7 @@ out: free_backref_node(cache, lower); } - free_backref_node(cache, node); + remove_backref_node(cache, node); return ERR_PTR(err); } ASSERT(!node || !node->detached); @@ -1244,7 +1284,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, new_node->level = node->level; new_node->lowest = node->lowest; new_node->checked = 1; - new_node->root = dest; + new_node->root = btrfs_grab_root(dest); + ASSERT(new_node->root); if (!node->lowest) { list_for_each_entry(edge, &node->lower, list[UPPER]) { @@ -1298,7 +1339,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) if (!node) return -ENOMEM; - node->bytenr = root->node->start; + node->bytenr = root->commit_root->start; node->data = root; spin_lock(&rc->reloc_root_tree.lock); @@ -1325,14 +1366,16 @@ static void __del_reloc_root(struct btrfs_root *root) struct rb_node *rb_node; struct mapping_node *node = NULL; struct reloc_control *rc = fs_info->reloc_ctl; + bool put_ref = false; if (rc && root->node) { spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); + root->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + RB_CLEAR_NODE(&node->rb_node); } spin_unlock(&rc->reloc_root_tree.lock); if (!node) @@ -1340,9 +1383,22 @@ static void __del_reloc_root(struct btrfs_root *root) BUG_ON((struct btrfs_root *)node->data != root); } + /* + * We only put the reloc root here if it's on the list. There's a lot + * of places where the pattern is to splice the rc->reloc_roots, process + * the reloc roots, and then add the reloc root back onto + * rc->reloc_roots. If we call __del_reloc_root while it's off of the + * list we don't want the reference being dropped, because the guy + * messing with the list is in charge of the reference. + */ spin_lock(&fs_info->trans_lock); - list_del_init(&root->root_list); + if (!list_empty(&root->root_list)) { + put_ref = true; + list_del_init(&root->root_list); + } spin_unlock(&fs_info->trans_lock); + if (put_ref) + btrfs_put_root(root); kfree(node); } @@ -1350,7 +1406,7 @@ static void __del_reloc_root(struct btrfs_root *root) * helper to update the 'address of tree root -> reloc tree' * mapping */ -static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) +static int __update_reloc_root(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *rb_node; @@ -1359,7 +1415,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->node->start); + root->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); @@ -1371,7 +1427,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) BUG_ON((struct btrfs_root *)node->data != root); spin_lock(&rc->reloc_root_tree.lock); - node->bytenr = new_bytenr; + node->bytenr = root->node->start; rb_node = tree_insert(&rc->reloc_root_tree.rb_root, node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); @@ -1447,8 +1503,9 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, BUG_ON(ret); kfree(root_item); - reloc_root = btrfs_read_fs_root(fs_info->tree_root, &root_key); + reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key); BUG_ON(IS_ERR(reloc_root)); + set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state); reloc_root->last_trans = trans->transid; return reloc_root; } @@ -1456,6 +1513,9 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, /* * create reloc tree for a given fs tree. reloc tree is just a * snapshot of the fs tree with special root objectid. + * + * The reloc_root comes out of here with two references, one for + * root->reloc_root, and another for being on the rc->reloc_roots list. */ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) @@ -1467,6 +1527,9 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, int clear_rsv = 0; int ret; + if (!rc) + return 0; + /* * The subvolume has reloc tree but the swap is finished, no need to * create/update the dead reloc tree @@ -1474,13 +1537,25 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, if (reloc_root_is_dead(root)) return 0; + /* + * This is subtle but important. We do not do + * record_root_in_transaction for reloc roots, instead we record their + * corresponding fs root, and then here we update the last trans for the + * reloc root. This means that we have to do this for the entire life + * of the reloc root, regardless of which stage of the relocation we are + * in. + */ if (root->reloc_root) { reloc_root = root->reloc_root; reloc_root->last_trans = trans->transid; return 0; } - if (!rc || !rc->create_reloc_tree || + /* + * We are merging reloc roots, we do not need new reloc trees. Also + * reloc trees never need their own reloc tree. + */ + if (!rc->create_reloc_tree || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) return 0; @@ -1495,7 +1570,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, ret = __add_reloc_root(reloc_root); BUG_ON(ret < 0); - root->reloc_root = reloc_root; + root->reloc_root = btrfs_grab_root(reloc_root); return 0; } @@ -1516,6 +1591,13 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, reloc_root = root->reloc_root; root_item = &reloc_root->root_item; + /* + * We are probably ok here, but __del_reloc_root() will drop its ref of + * the root. We have the ref for root->reloc_root, but just in case + * hold it while we update the reloc root. + */ + btrfs_grab_root(reloc_root); + /* root->reloc_root will stay until current relocation finished */ if (fs_info->reloc_ctl->merge_reloc_tree && btrfs_root_refs(root_item) == 0) { @@ -1529,6 +1611,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, } if (reloc_root->commit_root != reloc_root->node) { + __update_reloc_root(reloc_root); btrfs_set_root_node(root_item, reloc_root->node); free_extent_buffer(reloc_root->commit_root); reloc_root->commit_root = btrfs_root_node(reloc_root); @@ -1537,7 +1620,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, ret = btrfs_update_root(trans, fs_info->tree_root, &reloc_root->root_key, root_item); BUG_ON(ret); - + btrfs_put_root(reloc_root); out: return 0; } @@ -2211,7 +2294,7 @@ static void insert_dirty_subvol(struct btrfs_trans_handle *trans, btrfs_update_reloc_root(trans, root); if (list_empty(&root->reloc_dirty_list)) { - btrfs_grab_fs_root(root); + btrfs_grab_root(root); list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots); } } @@ -2231,24 +2314,34 @@ static int clean_dirty_subvols(struct reloc_control *rc) list_del_init(&root->reloc_dirty_list); root->reloc_root = NULL; - if (reloc_root) { - - ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1); - if (ret2 < 0 && !ret) - ret = ret2; - } /* * Need barrier to ensure clear_bit() only happens after * root->reloc_root = NULL. Pairs with have_reloc_root. */ smp_wmb(); clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); - btrfs_put_fs_root(root); + if (reloc_root) { + /* + * btrfs_drop_snapshot drops our ref we hold for + * ->reloc_root. If it fails however we must + * drop the ref ourselves. + */ + ret2 = btrfs_drop_snapshot(reloc_root, 0, 1); + if (ret2 < 0) { + btrfs_put_root(reloc_root); + if (!ret) + ret = ret2; + } + } + btrfs_put_root(root); } else { /* Orphan reloc tree, just clean it up */ - ret2 = btrfs_drop_snapshot(root, NULL, 0, 1); - if (ret2 < 0 && !ret) - ret = ret2; + ret2 = btrfs_drop_snapshot(root, 0, 1); + if (ret2 < 0) { + btrfs_put_root(root); + if (!ret) + ret = ret2; + } } } return ret; @@ -2325,6 +2418,18 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, trans = NULL; goto out; } + + /* + * At this point we no longer have a reloc_control, so we can't + * depend on btrfs_init_reloc_root to update our last_trans. + * + * But that's ok, we started the trans handle on our + * corresponding fs_root, which means it's been added to the + * dirty list. At commit time we'll still call + * btrfs_update_reloc_root() and update our root item + * appropriately. + */ + reloc_root->last_trans = trans->transid; trans->block_rsv = rc->block_rsv; replaced = 0; @@ -2435,7 +2540,7 @@ again: if (IS_ERR(trans)) { if (!err) btrfs_block_rsv_release(fs_info, rc->block_rsv, - num_bytes); + num_bytes, NULL); return PTR_ERR(trans); } @@ -2443,7 +2548,7 @@ again: if (num_bytes != rc->merging_rsv_size) { btrfs_end_transaction(trans); btrfs_block_rsv_release(fs_info, rc->block_rsv, - num_bytes); + num_bytes, NULL); goto again; } } @@ -2468,6 +2573,7 @@ again: btrfs_update_reloc_root(trans, root); list_add(&reloc_root->root_list, &reloc_roots); + btrfs_put_root(root); } list_splice(&reloc_roots, &rc->reloc_roots); @@ -2488,10 +2594,6 @@ void free_reloc_roots(struct list_head *list) reloc_root = list_entry(list->next, struct btrfs_root, root_list); __del_reloc_root(reloc_root); - free_extent_buffer(reloc_root->node); - free_extent_buffer(reloc_root->commit_root); - reloc_root->node = NULL; - reloc_root->commit_root = NULL; } } @@ -2529,6 +2631,7 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); + btrfs_put_root(root); if (ret) { if (list_empty(&reloc_root->root_list)) list_add_tail(&reloc_root->root_list, @@ -2561,7 +2664,21 @@ out: free_reloc_roots(&reloc_roots); } - BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); + /* + * We used to have + * + * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); + * + * here, but it's wrong. If we fail to start the transaction in + * prepare_to_merge() we will have only 0 ref reloc roots, none of which + * have actually been removed from the reloc_root_tree rb tree. This is + * fine because we're bailing here, and we hold a reference on the root + * for the list that holds it, so these roots will be cleaned up when we + * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root + * will be cleaned up on unmount. + * + * The remaining nodes will be cleaned up by free_reloc_control. + */ } static void free_block_list(struct rb_root *blocks) @@ -2580,6 +2697,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = reloc_root->fs_info; struct btrfs_root *root; + int ret; if (reloc_root->last_trans == trans->transid) return 0; @@ -2587,8 +2705,10 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, root = read_fs_root(fs_info, reloc_root->root_key.offset); BUG_ON(IS_ERR(root)); BUG_ON(root->reloc_root != reloc_root); + ret = btrfs_record_root_in_trans(trans, root); + btrfs_put_root(root); - return btrfs_record_root_in_trans(trans, root); + return ret; } static noinline_for_stack @@ -2621,7 +2741,9 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, BUG_ON(next->new_bytenr); BUG_ON(!list_empty(&next->list)); next->new_bytenr = root->node->start; - next->root = root; + btrfs_put_root(next->root); + next->root = btrfs_grab_root(root); + ASSERT(next->root); list_add_tail(&next->list, &rc->backref_cache.changed); __mark_block_processed(rc, next); @@ -3040,7 +3162,6 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info, { struct extent_buffer *eb; - BUG_ON(block->key_ready); eb = read_tree_block(fs_info, block->bytenr, block->key.offset, block->level, NULL); if (IS_ERR(eb)) { @@ -3073,6 +3194,14 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, if (!node) return 0; + /* + * If we fail here we want to drop our backref_node because we are going + * to start over and regenerate the tree for it. + */ + ret = reserve_metadata_space(trans, rc, node); + if (ret) + goto out; + BUG_ON(node->processed); root = select_one_root(node); if (root == ERR_PTR(-ENOENT)) { @@ -3080,12 +3209,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, goto out; } - if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { - ret = reserve_metadata_space(trans, rc, node); - if (ret) - goto out; - } - if (root) { if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { BUG_ON(node->new_bytenr); @@ -3093,7 +3216,9 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, btrfs_record_root_in_trans(trans, root); root = root->reloc_root; node->new_bytenr = root->node->start; - node->root = root; + btrfs_put_root(node->root); + node->root = btrfs_grab_root(root); + ASSERT(node->root); list_add_tail(&node->list, &rc->backref_cache.changed); } else { path->lowest_level = node->level; @@ -3161,9 +3286,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, ret = relocate_tree_block(trans, rc, node, &block->key, path); if (ret < 0) { - if (ret != -EAGAIN || &block->rb_node == rb_first(blocks)) - err = ret; - goto out; + err = ret; + break; } } out: @@ -3264,6 +3388,15 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, return ret; } +/* + * Allow error injection to test balance cancellation + */ +int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info) +{ + return atomic_read(&fs_info->balance_cancel_req); +} +ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE); + static int relocate_file_extent_cluster(struct inode *inode, struct file_extent_cluster *cluster) { @@ -3385,6 +3518,10 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); balance_dirty_pages_ratelimited(inode->i_mapping); btrfs_throttle(fs_info); + if (btrfs_should_cancel_balance(fs_info)) { + ret = -ECANCELED; + goto out; + } } WARN_ON(nr != cluster->nr); out: @@ -3556,31 +3693,6 @@ out: return ret; } -/* - * helper to check if the block use full backrefs for pointers in it - */ -static int block_use_full_backref(struct reloc_control *rc, - struct extent_buffer *eb) -{ - u64 flags; - int ret; - - if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || - btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) - return 1; - - ret = btrfs_lookup_extent_info(NULL, rc->extent_root->fs_info, - eb->start, btrfs_header_level(eb), 1, - NULL, &flags); - BUG_ON(ret); - - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) - ret = 1; - else - ret = 0; - return ret; -} - static int delete_block_group_cache(struct btrfs_fs_info *fs_info, struct btrfs_block_group *block_group, struct inode *inode, @@ -3624,172 +3736,40 @@ out: } /* - * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY - * this function scans fs tree to find blocks reference the data extent + * Locate the free space cache EXTENT_DATA in root tree leaf and delete the + * cache inode, to avoid free space cache data extent blocking data relocation. */ -static int find_data_references(struct reloc_control *rc, - struct btrfs_key *extent_key, - struct extent_buffer *leaf, - struct btrfs_extent_data_ref *ref, - struct rb_root *blocks) +static int delete_v1_space_cache(struct extent_buffer *leaf, + struct btrfs_block_group *block_group, + u64 data_bytenr) { - struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - struct btrfs_path *path; - struct tree_block *block; - struct btrfs_root *root; - struct btrfs_file_extent_item *fi; - struct rb_node *rb_node; + u64 space_cache_ino; + struct btrfs_file_extent_item *ei; struct btrfs_key key; - u64 ref_root; - u64 ref_objectid; - u64 ref_offset; - u32 ref_count; - u32 nritems; - int err = 0; - int added = 0; - int counted; + bool found = false; + int i; int ret; - ref_root = btrfs_extent_data_ref_root(leaf, ref); - ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); - ref_offset = btrfs_extent_data_ref_offset(leaf, ref); - ref_count = btrfs_extent_data_ref_count(leaf, ref); - - /* - * This is an extent belonging to the free space cache, lets just delete - * it and redo the search. - */ - if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { - ret = delete_block_group_cache(fs_info, rc->block_group, - NULL, ref_objectid); - if (ret != -ENOENT) - return ret; - ret = 0; - } - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->reada = READA_FORWARD; - - root = read_fs_root(fs_info, ref_root); - if (IS_ERR(root)) { - err = PTR_ERR(root); - goto out; - } - - key.objectid = ref_objectid; - key.type = BTRFS_EXTENT_DATA_KEY; - if (ref_offset > ((u64)-1 << 32)) - key.offset = 0; - else - key.offset = ref_offset; - - path->search_commit_root = 1; - path->skip_locking = 1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - err = ret; - goto out; - } - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - /* - * the references in tree blocks that use full backrefs - * are not counted in - */ - if (block_use_full_backref(rc, leaf)) - counted = 0; - else - counted = 1; - rb_node = tree_search(blocks, leaf->start); - if (rb_node) { - if (counted) - added = 1; - else - path->slots[0] = nritems; - } - - while (ref_count > 0) { - while (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) { - err = ret; - goto out; - } - if (WARN_ON(ret > 0)) - goto out; - - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - added = 0; - - if (block_use_full_backref(rc, leaf)) - counted = 0; - else - counted = 1; - rb_node = tree_search(blocks, leaf->start); - if (rb_node) { - if (counted) - added = 1; - else - path->slots[0] = nritems; - } - } + if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID) + return 0; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (WARN_ON(key.objectid != ref_objectid || - key.type != BTRFS_EXTENT_DATA_KEY)) + for (i = 0; i < btrfs_header_nritems(leaf); i++) { + btrfs_item_key_to_cpu(leaf, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG && + btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) { + found = true; + space_cache_ino = key.objectid; break; - - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - goto next; - - if (btrfs_file_extent_disk_bytenr(leaf, fi) != - extent_key->objectid) - goto next; - - key.offset -= btrfs_file_extent_offset(leaf, fi); - if (key.offset != ref_offset) - goto next; - - if (counted) - ref_count--; - if (added) - goto next; - - if (!tree_block_processed(leaf->start, rc)) { - block = kmalloc(sizeof(*block), GFP_NOFS); - if (!block) { - err = -ENOMEM; - break; - } - block->bytenr = leaf->start; - btrfs_item_key_to_cpu(leaf, &block->key, 0); - block->level = 0; - block->key_ready = 1; - rb_node = tree_insert(blocks, block->bytenr, - &block->rb_node); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, - block->bytenr); } - if (counted) - added = 1; - else - path->slots[0] = nritems; -next: - path->slots[0]++; - } -out: - btrfs_free_path(path); - return err; + if (!found) + return -ENOENT; + ret = delete_block_group_cache(leaf->fs_info, block_group, NULL, + space_cache_ino); + return ret; } /* @@ -3801,91 +3781,41 @@ int add_data_references(struct reloc_control *rc, struct btrfs_path *path, struct rb_root *blocks) { - struct btrfs_key key; - struct extent_buffer *eb; - struct btrfs_extent_data_ref *dref; - struct btrfs_extent_inline_ref *iref; - unsigned long ptr; - unsigned long end; - u32 blocksize = rc->extent_root->fs_info->nodesize; + struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; + struct ulist *leaves = NULL; + struct ulist_iterator leaf_uiter; + struct ulist_node *ref_node = NULL; + const u32 blocksize = fs_info->nodesize; int ret = 0; - int err = 0; - - eb = path->nodes[0]; - ptr = btrfs_item_ptr_offset(eb, path->slots[0]); - end = ptr + btrfs_item_size_nr(eb, path->slots[0]); - ptr += sizeof(struct btrfs_extent_item); - while (ptr < end) { - iref = (struct btrfs_extent_inline_ref *)ptr; - key.type = btrfs_get_extent_inline_ref_type(eb, iref, - BTRFS_REF_TYPE_DATA); - if (key.type == BTRFS_SHARED_DATA_REF_KEY) { - key.offset = btrfs_extent_inline_ref_offset(eb, iref); - ret = __add_tree_block(rc, key.offset, blocksize, - blocks); - } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { - dref = (struct btrfs_extent_data_ref *)(&iref->offset); - ret = find_data_references(rc, extent_key, - eb, dref, blocks); - } else { - ret = -EUCLEAN; - btrfs_err(rc->extent_root->fs_info, - "extent %llu slot %d has an invalid inline ref type", - eb->start, path->slots[0]); - } - if (ret) { - err = ret; - goto out; - } - ptr += btrfs_extent_inline_ref_size(key.type); - } - WARN_ON(ptr > end); + btrfs_release_path(path); + ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid, + 0, &leaves, NULL, true); + if (ret < 0) + return ret; - while (1) { - cond_resched(); - eb = path->nodes[0]; - if (path->slots[0] >= btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(rc->extent_root, path); - if (ret < 0) { - err = ret; - break; - } - if (ret > 0) - break; - eb = path->nodes[0]; - } + ULIST_ITER_INIT(&leaf_uiter); + while ((ref_node = ulist_next(leaves, &leaf_uiter))) { + struct extent_buffer *eb; - btrfs_item_key_to_cpu(eb, &key, path->slots[0]); - if (key.objectid != extent_key->objectid) + eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL); + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); break; - - if (key.type == BTRFS_SHARED_DATA_REF_KEY) { - ret = __add_tree_block(rc, key.offset, blocksize, - blocks); - } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { - dref = btrfs_item_ptr(eb, path->slots[0], - struct btrfs_extent_data_ref); - ret = find_data_references(rc, extent_key, - eb, dref, blocks); - } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { - btrfs_print_v0_err(eb->fs_info); - btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); - ret = -EINVAL; - } else { - ret = 0; } - if (ret) { - err = ret; + ret = delete_v1_space_cache(eb, rc->block_group, + extent_key->objectid); + free_extent_buffer(eb); + if (ret < 0) + break; + ret = __add_tree_block(rc, ref_node->val, blocksize, blocks); + if (ret < 0) break; - } - path->slots[0]++; } -out: - btrfs_release_path(path); - if (err) + if (ret < 0) free_block_list(blocks); - return err; + ulist_free(leaves); + return ret; } /* @@ -4137,12 +4067,6 @@ restart: if (!RB_EMPTY_ROOT(&blocks)) { ret = relocate_tree_blocks(trans, rc, &blocks); if (ret < 0) { - /* - * if we fail to relocate tree blocks, force to update - * backref cache when committing transaction. - */ - rc->backref_cache.last_trans = trans->transid - 1; - if (ret != -EAGAIN) { err = ret; break; @@ -4166,6 +4090,10 @@ restart: break; } } + if (btrfs_should_cancel_balance(fs_info)) { + err = -ECANCELED; + break; + } } if (trans && progress && err == -ENOSPC) { ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags); @@ -4195,15 +4123,23 @@ restart: set_reloc_control(rc); backref_cache_cleanup(&rc->backref_cache); - btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1); + btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL); + /* + * Even in the case when the relocation is cancelled, we should all go + * through prepare_to_merge() and merge_reloc_roots(). + * + * For error (including cancelled balance), prepare_to_merge() will + * mark all reloc trees orphan, then queue them for cleanup in + * merge_reloc_roots() + */ err = prepare_to_merge(rc, err); merge_reloc_roots(rc); rc->merge_reloc_tree = 0; unset_reloc_control(rc); - btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1); + btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL); /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root); @@ -4212,10 +4148,10 @@ restart: goto out_free; } btrfs_commit_transaction(trans); +out_free: ret = clean_dirty_subvols(rc); if (ret < 0 && !err) err = ret; -out_free: btrfs_free_block_rsv(fs_info, rc->block_rsv); btrfs_free_path(path); return err; @@ -4271,8 +4207,10 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, return ERR_CAST(root); trans = btrfs_start_transaction(root, 6); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { + btrfs_put_root(root); return ERR_CAST(trans); + } err = btrfs_find_free_objectid(root, &objectid); if (err) @@ -4290,6 +4228,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, err = btrfs_orphan_add(trans, BTRFS_I(inode)); out: + btrfs_put_root(root); btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); if (err) { @@ -4317,6 +4256,18 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) return rc; } +static void free_reloc_control(struct reloc_control *rc) +{ + struct mapping_node *node, *tmp; + + free_reloc_roots(&rc->reloc_roots); + rbtree_postorder_for_each_entry_safe(node, tmp, + &rc->reloc_root_tree.rb_root, rb_node) + kfree(node); + + kfree(rc); +} + /* * Print the block group being relocated */ @@ -4461,7 +4412,7 @@ out: btrfs_dec_block_group_ro(rc->block_group); iput(rc->data_inode); btrfs_put_block_group(rc->block_group); - kfree(rc); + free_reloc_control(rc); return err; } @@ -4537,12 +4488,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) key.type != BTRFS_ROOT_ITEM_KEY) break; - reloc_root = btrfs_read_fs_root(root, &key); + reloc_root = btrfs_read_tree_root(root, &key); if (IS_ERR(reloc_root)) { err = PTR_ERR(reloc_root); goto out; } + set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state); list_add(&reloc_root->root_list, &reloc_roots); if (btrfs_root_refs(&reloc_root->root_item) > 0) { @@ -4559,6 +4511,8 @@ int btrfs_recover_relocation(struct btrfs_root *root) err = ret; goto out; } + } else { + btrfs_put_root(fs_root); } } @@ -4584,9 +4538,8 @@ int btrfs_recover_relocation(struct btrfs_root *root) trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { - unset_reloc_control(rc); err = PTR_ERR(trans); - goto out_free; + goto out_unset; } rc->merge_reloc_tree = 1; @@ -4606,17 +4559,18 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (IS_ERR(fs_root)) { err = PTR_ERR(fs_root); list_add_tail(&reloc_root->root_list, &reloc_roots); - goto out_free; + goto out_unset; } err = __add_reloc_root(reloc_root); BUG_ON(err < 0); /* -ENOMEM or logic error */ - fs_root->reloc_root = reloc_root; + fs_root->reloc_root = btrfs_grab_root(reloc_root); + btrfs_put_root(fs_root); } err = btrfs_commit_transaction(trans); if (err) - goto out_free; + goto out_unset; merge_reloc_roots(rc); @@ -4625,15 +4579,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto out_free; + goto out_clean; } err = btrfs_commit_transaction(trans); - +out_clean: ret = clean_dirty_subvols(rc); if (ret < 0 && !err) err = ret; -out_free: - kfree(rc); +out_unset: + unset_reloc_control(rc); + free_reloc_control(rc); out: if (!list_empty(&reloc_roots)) free_reloc_roots(&reloc_roots); @@ -4643,10 +4598,12 @@ out: if (err == 0) { /* cleanup orphan inode in data relocation tree */ fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); - if (IS_ERR(fs_root)) + if (IS_ERR(fs_root)) { err = PTR_ERR(fs_root); - else + } else { err = btrfs_orphan_cleanup(fs_root); + btrfs_put_root(fs_root); + } } return err; } @@ -4720,11 +4677,6 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, BUG_ON(rc->stage == UPDATE_DATA_PTRS && root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - if (buf == root->node) - __update_reloc_root(root, cow->start); - } - level = btrfs_header_level(buf); if (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item)) @@ -4795,6 +4747,10 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, /* * called after snapshot is created. migrate block reservation * and create reloc root for the newly created snapshot + * + * This is similar to btrfs_init_reloc_root(), we come out of here with two + * references held on the reloc_root, one for root->reloc_root and one for + * rc->reloc_roots. */ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending) @@ -4827,7 +4783,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, ret = __add_reloc_root(reloc_root); BUG_ON(ret < 0); - new_root->reloc_root = reloc_root; + new_root->reloc_root = btrfs_grab_root(reloc_root); if (rc->create_reloc_tree) ret = clone_backref_node(trans, rc, root, reloc_root); |