diff options
Diffstat (limited to 'fs/btrfs/backref.c')
-rw-r--r-- | fs/btrfs/backref.c | 1023 |
1 files changed, 946 insertions, 77 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e5d85311d5d5..ea1c28ccb44f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -13,6 +13,7 @@ #include "transaction.h" #include "delayed-ref.h" #include "locking.h" +#include "misc.h" /* Just an arbitrary number so we can be sure this happened */ #define BACKREF_FOUND_SHARED 6 @@ -347,33 +348,10 @@ static int add_prelim_ref(const struct btrfs_fs_info *fs_info, return -ENOMEM; ref->root_id = root_id; - if (key) { + if (key) ref->key_for_search = *key; - /* - * We can often find data backrefs with an offset that is too - * large (>= LLONG_MAX, maximum allowed file offset) due to - * underflows when subtracting a file's offset with the data - * offset of its corresponding extent data item. This can - * happen for example in the clone ioctl. - * So if we detect such case we set the search key's offset to - * zero to make sure we will find the matching file extent item - * at add_all_parents(), otherwise we will miss it because the - * offset taken form the backref is much larger then the offset - * of the file extent item. This can make us scan a very large - * number of file extent items, but at least it will not make - * us miss any. - * This is an ugly workaround for a behaviour that should have - * never existed, but it does and a fix for the clone ioctl - * would touch a lot of places, cause backwards incompatibility - * and would not fix the problem for extents cloned with older - * kernels. - */ - if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY && - ref->key_for_search.offset >= LLONG_MAX) - ref->key_for_search.offset = 0; - } else { + else memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); - } ref->inode_list = NULL; ref->level = level; @@ -409,10 +387,36 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info, wanted_disk_byte, count, sc, gfp_mask); } +static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) +{ + struct rb_node **p = &preftrees->direct.root.rb_root.rb_node; + struct rb_node *parent = NULL; + struct prelim_ref *ref = NULL; + struct prelim_ref target = {}; + int result; + + target.parent = bytenr; + + while (*p) { + parent = *p; + ref = rb_entry(parent, struct prelim_ref, rbnode); + result = prelim_ref_compare(ref, &target); + + if (result < 0) + p = &(*p)->rb_left; + else if (result > 0) + p = &(*p)->rb_right; + else + return 1; + } + return 0; +} + static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, - struct ulist *parents, struct prelim_ref *ref, + struct ulist *parents, + struct preftrees *preftrees, struct prelim_ref *ref, int level, u64 time_seq, const u64 *extent_item_pos, - u64 total_refs, bool ignore_offset) + bool ignore_offset) { int ret = 0; int slot; @@ -424,6 +428,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, u64 disk_byte; u64 wanted_disk_byte = ref->wanted_disk_byte; u64 count = 0; + u64 data_offset; if (level != 0) { eb = path->nodes[level]; @@ -434,18 +439,26 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, } /* - * We normally enter this function with the path already pointing to - * the first item to check. But sometimes, we may enter it with - * slot==nritems. In that case, go to the next leaf before we continue. + * 1. We normally enter this function with the path already pointing to + * the first item to check. But sometimes, we may enter it with + * slot == nritems. + * 2. We are searching for normal backref but bytenr of this leaf + * matches shared data backref + * 3. The leaf owner is not equal to the root we are searching + * + * For these cases, go to the next leaf before we continue. */ - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + eb = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(eb) || + is_shared_data_backref(preftrees, eb->start) || + ref->root_id != btrfs_header_owner(eb)) { if (time_seq == SEQ_LAST) ret = btrfs_next_leaf(root, path); else ret = btrfs_next_old_leaf(root, path, time_seq); } - while (!ret && count < total_refs) { + while (!ret && count < ref->count) { eb = path->nodes[0]; slot = path->slots[0]; @@ -455,13 +468,31 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, key.type != BTRFS_EXTENT_DATA_KEY) break; + /* + * We are searching for normal backref but bytenr of this leaf + * matches shared data backref, OR + * the leaf owner is not equal to the root we are searching for + */ + if (slot == 0 && + (is_shared_data_backref(preftrees, eb->start) || + ref->root_id != btrfs_header_owner(eb))) { + if (time_seq == SEQ_LAST) + ret = btrfs_next_leaf(root, path); + else + ret = btrfs_next_old_leaf(root, path, time_seq); + continue; + } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + data_offset = btrfs_file_extent_offset(eb, fi); if (disk_byte == wanted_disk_byte) { eie = NULL; old = NULL; - count++; + if (ref->key_for_search.offset == key.offset - data_offset) + count++; + else + goto next; if (extent_item_pos) { ret = check_extent_in_eb(&key, eb, fi, *extent_item_pos, @@ -502,33 +533,30 @@ next: */ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 time_seq, + struct preftrees *preftrees, struct prelim_ref *ref, struct ulist *parents, - const u64 *extent_item_pos, u64 total_refs, - bool ignore_offset) + const u64 *extent_item_pos, bool ignore_offset) { struct btrfs_root *root; - struct btrfs_key root_key; struct extent_buffer *eb; int ret = 0; int root_level; int level = ref->level; - int index; - - root_key.objectid = ref->root_id; - root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = (u64)-1; - - index = srcu_read_lock(&fs_info->subvol_srcu); + struct btrfs_key search_key = ref->key_for_search; - root = btrfs_get_fs_root(fs_info, &root_key, false); + root = btrfs_get_fs_root(fs_info, ref->root_id, false); if (IS_ERR(root)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); + goto out_free; + } + + if (!path->search_commit_root && + test_bit(BTRFS_ROOT_DELETING, &root->state)) { + ret = -ENOENT; goto out; } if (btrfs_is_testing(fs_info)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); ret = -ENOENT; goto out; } @@ -540,21 +568,36 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, else root_level = btrfs_old_root_level(root, time_seq); - if (root_level + 1 == level) { - srcu_read_unlock(&fs_info->subvol_srcu, index); + if (root_level + 1 == level) goto out; - } + /* + * We can often find data backrefs with an offset that is too large + * (>= LLONG_MAX, maximum allowed file offset) due to underflows when + * subtracting a file's offset with the data offset of its + * corresponding extent data item. This can happen for example in the + * clone ioctl. + * + * So if we detect such case we set the search key's offset to zero to + * make sure we will find the matching file extent item at + * add_all_parents(), otherwise we will miss it because the offset + * taken form the backref is much larger then the offset of the file + * extent item. This can make us scan a very large number of file + * extent items, but at least it will not make us miss any. + * + * This is an ugly workaround for a behaviour that should have never + * existed, but it does and a fix for the clone ioctl would touch a lot + * of places, cause backwards incompatibility and would not fix the + * problem for extents cloned with older kernels. + */ + if (search_key.type == BTRFS_EXTENT_DATA_KEY && + search_key.offset >= LLONG_MAX) + search_key.offset = 0; path->lowest_level = level; if (time_seq == SEQ_LAST) - ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, - 0, 0); + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); else - ret = btrfs_search_old_slot(root, &ref->key_for_search, path, - time_seq); - - /* root node has been locked, we can release @subvol_srcu safely here */ - srcu_read_unlock(&fs_info->subvol_srcu, index); + ret = btrfs_search_old_slot(root, &search_key, path, time_seq); btrfs_debug(fs_info, "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", @@ -574,9 +617,11 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, eb = path->nodes[level]; } - ret = add_all_parents(root, path, parents, ref, level, time_seq, - extent_item_pos, total_refs, ignore_offset); + ret = add_all_parents(root, path, parents, preftrees, ref, level, + time_seq, extent_item_pos, ignore_offset); out: + btrfs_put_root(root); +out_free: path->lowest_level = 0; btrfs_release_path(path); return ret; @@ -609,7 +654,7 @@ unode_aux_to_inode_list(struct ulist_node *node) static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 time_seq, struct preftrees *preftrees, - const u64 *extent_item_pos, u64 total_refs, + const u64 *extent_item_pos, struct share_check *sc, bool ignore_offset) { int err; @@ -653,9 +698,9 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, ret = BACKREF_FOUND_SHARED; goto out; } - err = resolve_indirect_ref(fs_info, path, time_seq, ref, - parents, extent_item_pos, - total_refs, ignore_offset); + err = resolve_indirect_ref(fs_info, path, time_seq, preftrees, + ref, parents, extent_item_pos, + ignore_offset); /* * we can only tolerate ENOENT,otherwise,we should catch error * and return directly. @@ -758,8 +803,7 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info, */ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head, u64 seq, - struct preftrees *preftrees, u64 *total_refs, - struct share_check *sc) + struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; struct btrfs_delayed_extent_op *extent_op = head->extent_op; @@ -793,7 +837,6 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, default: BUG(); } - *total_refs += count; switch (node->type) { case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ @@ -876,7 +919,7 @@ out: static int add_inline_refs(const struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, int *info_level, struct preftrees *preftrees, - u64 *total_refs, struct share_check *sc) + struct share_check *sc) { int ret = 0; int slot; @@ -900,7 +943,6 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); - *total_refs += btrfs_extent_refs(leaf, ei); btrfs_item_key_to_cpu(leaf, &found_key, slot); ptr = (unsigned long)(ei + 1); @@ -1125,8 +1167,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct prelim_ref *ref; struct rb_node *node; struct extent_inode_elem *eie = NULL; - /* total of both direct AND indirect refs! */ - u64 total_refs = 0; struct preftrees preftrees = { .direct = PREFTREE_INIT, .indirect = PREFTREE_INIT, @@ -1195,7 +1235,7 @@ again: } spin_unlock(&delayed_refs->lock); ret = add_delayed_refs(fs_info, head, time_seq, - &preftrees, &total_refs, sc); + &preftrees, sc); mutex_unlock(&head->mutex); if (ret) goto out; @@ -1216,8 +1256,7 @@ again: (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY)) { ret = add_inline_refs(fs_info, path, bytenr, - &info_level, &preftrees, - &total_refs, sc); + &info_level, &preftrees, sc); if (ret) goto out; ret = add_keyed_refs(fs_info, path, bytenr, info_level, @@ -1236,7 +1275,7 @@ again: WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root)); ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, - extent_item_pos, total_refs, sc, ignore_offset); + extent_item_pos, sc, ignore_offset); if (ret) goto out; @@ -1362,10 +1401,10 @@ static void free_leaf_list(struct ulist *blocks) * * returns 0 on success, <0 on error */ -static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **leafs, - const u64 *extent_item_pos, bool ignore_offset) +int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **leafs, + const u64 *extent_item_pos, bool ignore_offset) { int ret; @@ -1422,6 +1461,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); ulist_free(*roots); + *roots = NULL; return ret; } node = ulist_next(tmp, &uiter); @@ -2252,3 +2292,832 @@ void free_ipath(struct inode_fs_paths *ipath) kvfree(ipath->fspath); kfree(ipath); } + +struct btrfs_backref_iter *btrfs_backref_iter_alloc( + struct btrfs_fs_info *fs_info, gfp_t gfp_flag) +{ + struct btrfs_backref_iter *ret; + + ret = kzalloc(sizeof(*ret), gfp_flag); + if (!ret) + return NULL; + + ret->path = btrfs_alloc_path(); + if (!ret->path) { + kfree(ret); + return NULL; + } + + /* Current backref iterator only supports iteration in commit root */ + ret->path->search_commit_root = 1; + ret->path->skip_locking = 1; + ret->fs_info = fs_info; + + return ret; +} + +int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr) +{ + struct btrfs_fs_info *fs_info = iter->fs_info; + struct btrfs_path *path = iter->path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + int ret; + + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + iter->bytenr = bytenr; + + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { + ret = -EUCLEAN; + goto release; + } + if (path->slots[0] == 0) { + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + ret = -EUCLEAN; + goto release; + } + path->slots[0]--; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if ((key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) { + ret = -ENOENT; + goto release; + } + memcpy(&iter->cur_key, &key, sizeof(key)); + iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + iter->end_ptr = (u32)(iter->item_ptr + + btrfs_item_size_nr(path->nodes[0], path->slots[0])); + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + + /* + * Only support iteration on tree backref yet. + * + * This is an extra precaution for non skinny-metadata, where + * EXTENT_ITEM is also used for tree blocks, that we can only use + * extent flags to determine if it's a tree block. + */ + if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) { + ret = -ENOTSUPP; + goto release; + } + iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei)); + + /* If there is no inline backref, go search for keyed backref */ + if (iter->cur_ptr >= iter->end_ptr) { + ret = btrfs_next_item(fs_info->extent_root, path); + + /* No inline nor keyed ref */ + if (ret > 0) { + ret = -ENOENT; + goto release; + } + if (ret < 0) + goto release; + + btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, + path->slots[0]); + if (iter->cur_key.objectid != bytenr || + (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY && + iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) { + ret = -ENOENT; + goto release; + } + iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + iter->item_ptr = iter->cur_ptr; + iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr( + path->nodes[0], path->slots[0])); + } + + return 0; +release: + btrfs_backref_iter_release(iter); + return ret; +} + +/* + * Go to the next backref item of current bytenr, can be either inlined or + * keyed. + * + * Caller needs to check whether it's inline ref or not by iter->cur_key. + * + * Return 0 if we get next backref without problem. + * Return >0 if there is no extra backref for this bytenr. + * Return <0 if there is something wrong happened. + */ +int btrfs_backref_iter_next(struct btrfs_backref_iter *iter) +{ + struct extent_buffer *eb = btrfs_backref_get_eb(iter); + struct btrfs_path *path = iter->path; + struct btrfs_extent_inline_ref *iref; + int ret; + u32 size; + + if (btrfs_backref_iter_is_inline_ref(iter)) { + /* We're still inside the inline refs */ + ASSERT(iter->cur_ptr < iter->end_ptr); + + if (btrfs_backref_has_tree_block_info(iter)) { + /* First tree block info */ + size = sizeof(struct btrfs_tree_block_info); + } else { + /* Use inline ref type to determine the size */ + int type; + + iref = (struct btrfs_extent_inline_ref *) + ((unsigned long)iter->cur_ptr); + type = btrfs_extent_inline_ref_type(eb, iref); + + size = btrfs_extent_inline_ref_size(type); + } + iter->cur_ptr += size; + if (iter->cur_ptr < iter->end_ptr) + return 0; + + /* All inline items iterated, fall through */ + } + + /* We're at keyed items, there is no inline item, go to the next one */ + ret = btrfs_next_item(iter->fs_info->extent_root, iter->path); + if (ret) + return ret; + + btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]); + if (iter->cur_key.objectid != iter->bytenr || + (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY && + iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY)) + return 1; + iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]); + iter->cur_ptr = iter->item_ptr; + iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size_nr(path->nodes[0], + path->slots[0]); + return 0; +} + +void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info, + struct btrfs_backref_cache *cache, int is_reloc) +{ + int i; + + cache->rb_root = RB_ROOT; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) + INIT_LIST_HEAD(&cache->pending[i]); + INIT_LIST_HEAD(&cache->changed); + INIT_LIST_HEAD(&cache->detached); + INIT_LIST_HEAD(&cache->leaves); + INIT_LIST_HEAD(&cache->pending_edge); + INIT_LIST_HEAD(&cache->useless_node); + cache->fs_info = fs_info; + cache->is_reloc = is_reloc; +} + +struct btrfs_backref_node *btrfs_backref_alloc_node( + struct btrfs_backref_cache *cache, u64 bytenr, int level) +{ + struct btrfs_backref_node *node; + + ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL); + node = kzalloc(sizeof(*node), GFP_NOFS); + if (!node) + return node; + + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->upper); + INIT_LIST_HEAD(&node->lower); + RB_CLEAR_NODE(&node->rb_node); + cache->nr_nodes++; + node->level = level; + node->bytenr = bytenr; + + return node; +} + +struct btrfs_backref_edge *btrfs_backref_alloc_edge( + struct btrfs_backref_cache *cache) +{ + struct btrfs_backref_edge *edge; + + edge = kzalloc(sizeof(*edge), GFP_NOFS); + if (edge) + cache->nr_edges++; + return edge; +} + +/* + * Drop the backref node from cache, also cleaning up all its + * upper edges and any uncached nodes in the path. + * + * This cleanup happens bottom up, thus the node should either + * be the lowest node in the cache or a detached node. + */ +void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *node) +{ + struct btrfs_backref_node *upper; + struct btrfs_backref_edge *edge; + + if (!node) + return; + + BUG_ON(!node->lowest && !node->detached); + while (!list_empty(&node->upper)) { + edge = list_entry(node->upper.next, struct btrfs_backref_edge, + list[LOWER]); + upper = edge->node[UPPER]; + list_del(&edge->list[LOWER]); + list_del(&edge->list[UPPER]); + btrfs_backref_free_edge(cache, edge); + + if (RB_EMPTY_NODE(&upper->rb_node)) { + BUG_ON(!list_empty(&node->upper)); + btrfs_backref_drop_node(cache, node); + node = upper; + node->lowest = 1; + continue; + } + /* + * Add the node to leaf node list if no other child block + * cached. + */ + if (list_empty(&upper->lower)) { + list_add_tail(&upper->lower, &cache->leaves); + upper->lowest = 1; + } + } + + btrfs_backref_drop_node(cache, node); +} + +/* + * Release all nodes/edges from current cache + */ +void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) +{ + struct btrfs_backref_node *node; + int i; + + while (!list_empty(&cache->detached)) { + node = list_entry(cache->detached.next, + struct btrfs_backref_node, list); + btrfs_backref_cleanup_node(cache, node); + } + + while (!list_empty(&cache->leaves)) { + node = list_entry(cache->leaves.next, + struct btrfs_backref_node, lower); + btrfs_backref_cleanup_node(cache, node); + } + + cache->last_trans = 0; + + for (i = 0; i < BTRFS_MAX_LEVEL; i++) + ASSERT(list_empty(&cache->pending[i])); + ASSERT(list_empty(&cache->pending_edge)); + ASSERT(list_empty(&cache->useless_node)); + ASSERT(list_empty(&cache->changed)); + ASSERT(list_empty(&cache->detached)); + ASSERT(RB_EMPTY_ROOT(&cache->rb_root)); + ASSERT(!cache->nr_nodes); + ASSERT(!cache->nr_edges); +} + +/* + * Handle direct tree backref + * + * Direct tree backref means, the backref item shows its parent bytenr + * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined). + * + * @ref_key: The converted backref key. + * For keyed backref, it's the item key. + * For inlined backref, objectid is the bytenr, + * type is btrfs_inline_ref_type, offset is + * btrfs_inline_ref_offset. + */ +static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, + struct btrfs_key *ref_key, + struct btrfs_backref_node *cur) +{ + struct btrfs_backref_edge *edge; + struct btrfs_backref_node *upper; + struct rb_node *rb_node; + + ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY); + + /* Only reloc root uses backref pointing to itself */ + if (ref_key->objectid == ref_key->offset) { + struct btrfs_root *root; + + cur->is_reloc_root = 1; + /* Only reloc backref cache cares about a specific root */ + if (cache->is_reloc) { + root = find_reloc_root(cache->fs_info, cur->bytenr); + if (WARN_ON(!root)) + return -ENOENT; + cur->root = root; + } else { + /* + * For generic purpose backref cache, reloc root node + * is useless. + */ + list_add(&cur->list, &cache->useless_node); + } + return 0; + } + + edge = btrfs_backref_alloc_edge(cache); + if (!edge) + return -ENOMEM; + + rb_node = rb_simple_search(&cache->rb_root, ref_key->offset); + if (!rb_node) { + /* Parent node not yet cached */ + upper = btrfs_backref_alloc_node(cache, ref_key->offset, + cur->level + 1); + if (!upper) { + btrfs_backref_free_edge(cache, edge); + return -ENOMEM; + } + + /* + * Backrefs for the upper level block isn't cached, add the + * block to pending list + */ + list_add_tail(&edge->list[UPPER], &cache->pending_edge); + } else { + /* Parent node already cached */ + upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node); + ASSERT(upper->checked); + INIT_LIST_HEAD(&edge->list[UPPER]); + } + btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER); + return 0; +} + +/* + * Handle indirect tree backref + * + * Indirect tree backref means, we only know which tree the node belongs to. + * We still need to do a tree search to find out the parents. This is for + * TREE_BLOCK_REF backref (keyed or inlined). + * + * @ref_key: The same as @ref_key in handle_direct_tree_backref() + * @tree_key: The first key of this tree block. + * @path: A clean (released) path, to avoid allocating path everytime + * the function get called. + */ +static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache, + struct btrfs_path *path, + struct btrfs_key *ref_key, + struct btrfs_key *tree_key, + struct btrfs_backref_node *cur) +{ + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_backref_node *upper; + struct btrfs_backref_node *lower; + struct btrfs_backref_edge *edge; + struct extent_buffer *eb; + struct btrfs_root *root; + struct rb_node *rb_node; + int level; + bool need_check = true; + int ret; + + root = btrfs_get_fs_root(fs_info, ref_key->offset, false); + if (IS_ERR(root)) + return PTR_ERR(root); + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) + cur->cowonly = 1; + + if (btrfs_root_level(&root->root_item) == cur->level) { + /* Tree root */ + ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr); + /* + * For reloc backref cache, we may ignore reloc root. But for + * general purpose backref cache, we can't rely on + * btrfs_should_ignore_reloc_root() as it may conflict with + * current running relocation and lead to missing root. + * + * For general purpose backref cache, reloc root detection is + * completely relying on direct backref (key->offset is parent + * bytenr), thus only do such check for reloc cache. + */ + if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) { + btrfs_put_root(root); + list_add(&cur->list, &cache->useless_node); + } else { + cur->root = root; + } + return 0; + } + + level = cur->level + 1; + + /* Search the tree to find parent blocks referring to the block */ + path->search_commit_root = 1; + path->skip_locking = 1; + path->lowest_level = level; + ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0); + path->lowest_level = 0; + if (ret < 0) { + btrfs_put_root(root); + return ret; + } + if (ret > 0 && path->slots[level] > 0) + path->slots[level]--; + + eb = path->nodes[level]; + if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) { + btrfs_err(fs_info, +"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", + cur->bytenr, level - 1, root->root_key.objectid, + tree_key->objectid, tree_key->type, tree_key->offset); + btrfs_put_root(root); + ret = -ENOENT; + goto out; + } + lower = cur; + + /* Add all nodes and edges in the path */ + for (; level < BTRFS_MAX_LEVEL; level++) { + if (!path->nodes[level]) { + ASSERT(btrfs_root_bytenr(&root->root_item) == + lower->bytenr); + /* Same as previous should_ignore_reloc_root() call */ + if (btrfs_should_ignore_reloc_root(root) && + cache->is_reloc) { + btrfs_put_root(root); + list_add(&lower->list, &cache->useless_node); + } else { + lower->root = root; + } + break; + } + + edge = btrfs_backref_alloc_edge(cache); + if (!edge) { + btrfs_put_root(root); + ret = -ENOMEM; + goto out; + } + + eb = path->nodes[level]; + rb_node = rb_simple_search(&cache->rb_root, eb->start); + if (!rb_node) { + upper = btrfs_backref_alloc_node(cache, eb->start, + lower->level + 1); + if (!upper) { + btrfs_put_root(root); + btrfs_backref_free_edge(cache, edge); + ret = -ENOMEM; + goto out; + } + upper->owner = btrfs_header_owner(eb); + if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) + upper->cowonly = 1; + + /* + * If we know the block isn't shared we can avoid + * checking its backrefs. + */ + if (btrfs_block_can_be_shared(root, eb)) + upper->checked = 0; + else + upper->checked = 1; + + /* + * Add the block to pending list if we need to check its + * backrefs, we only do this once while walking up a + * tree as we will catch anything else later on. + */ + if (!upper->checked && need_check) { + need_check = false; + list_add_tail(&edge->list[UPPER], + &cache->pending_edge); + } else { + if (upper->checked) + need_check = true; + INIT_LIST_HEAD(&edge->list[UPPER]); + } + } else { + upper = rb_entry(rb_node, struct btrfs_backref_node, + rb_node); + ASSERT(upper->checked); + INIT_LIST_HEAD(&edge->list[UPPER]); + if (!upper->owner) + upper->owner = btrfs_header_owner(eb); + } + btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER); + + if (rb_node) { + btrfs_put_root(root); + break; + } + lower = upper; + upper = NULL; + } +out: + btrfs_release_path(path); + return ret; +} + +/* + * Add backref node @cur into @cache. + * + * NOTE: Even if the function returned 0, @cur is not yet cached as its upper + * links aren't yet bi-directional. Needs to finish such links. + * Use btrfs_backref_finish_upper_links() to finish such linkage. + * + * @path: Released path for indirect tree backref lookup + * @iter: Released backref iter for extent tree search + * @node_key: The first key of the tree block + */ +int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, + struct btrfs_path *path, + struct btrfs_backref_iter *iter, + struct btrfs_key *node_key, + struct btrfs_backref_node *cur) +{ + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_backref_edge *edge; + struct btrfs_backref_node *exist; + int ret; + + ret = btrfs_backref_iter_start(iter, cur->bytenr); + if (ret < 0) + return ret; + /* + * We skip the first btrfs_tree_block_info, as we don't use the key + * stored in it, but fetch it from the tree block + */ + if (btrfs_backref_has_tree_block_info(iter)) { + ret = btrfs_backref_iter_next(iter); + if (ret < 0) + goto out; + /* No extra backref? This means the tree block is corrupted */ + if (ret > 0) { + ret = -EUCLEAN; + goto out; + } + } + WARN_ON(cur->checked); + if (!list_empty(&cur->upper)) { + /* + * The backref was added previously when processing backref of + * type BTRFS_TREE_BLOCK_REF_KEY + */ + ASSERT(list_is_singular(&cur->upper)); + edge = list_entry(cur->upper.next, struct btrfs_backref_edge, + list[LOWER]); + ASSERT(list_empty(&edge->list[UPPER])); + exist = edge->node[UPPER]; + /* + * Add the upper level block to pending list if we need check + * its backrefs + */ + if (!exist->checked) + list_add_tail(&edge->list[UPPER], &cache->pending_edge); + } else { + exist = NULL; + } + + for (; ret == 0; ret = btrfs_backref_iter_next(iter)) { + struct extent_buffer *eb; + struct btrfs_key key; + int type; + + cond_resched(); + eb = btrfs_backref_get_eb(iter); + + key.objectid = iter->bytenr; + if (btrfs_backref_iter_is_inline_ref(iter)) { + struct btrfs_extent_inline_ref *iref; + + /* Update key for inline backref */ + iref = (struct btrfs_extent_inline_ref *) + ((unsigned long)iter->cur_ptr); + type = btrfs_get_extent_inline_ref_type(eb, iref, + BTRFS_REF_TYPE_BLOCK); + if (type == BTRFS_REF_TYPE_INVALID) { + ret = -EUCLEAN; + goto out; + } + key.type = type; + key.offset = btrfs_extent_inline_ref_offset(eb, iref); + } else { + key.type = iter->cur_key.type; + key.offset = iter->cur_key.offset; + } + + /* + * Parent node found and matches current inline ref, no need to + * rebuild this node for this inline ref + */ + if (exist && + ((key.type == BTRFS_TREE_BLOCK_REF_KEY && + exist->owner == key.offset) || + (key.type == BTRFS_SHARED_BLOCK_REF_KEY && + exist->bytenr == key.offset))) { + exist = NULL; + continue; + } + + /* SHARED_BLOCK_REF means key.offset is the parent bytenr */ + if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { + ret = handle_direct_tree_backref(cache, &key, cur); + if (ret < 0) + goto out; + continue; + } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { + ret = -EINVAL; + btrfs_print_v0_err(fs_info); + btrfs_handle_fs_error(fs_info, ret, NULL); + goto out; + } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { + continue; + } + + /* + * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset + * means the root objectid. We need to search the tree to get + * its parent bytenr. + */ + ret = handle_indirect_tree_backref(cache, path, &key, node_key, + cur); + if (ret < 0) + goto out; + } + ret = 0; + cur->checked = 1; + WARN_ON(exist); +out: + btrfs_backref_iter_release(iter); + return ret; +} + +/* + * Finish the upwards linkage created by btrfs_backref_add_tree_node() + */ +int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *start) +{ + struct list_head *useless_node = &cache->useless_node; + struct btrfs_backref_edge *edge; + struct rb_node *rb_node; + LIST_HEAD(pending_edge); + + ASSERT(start->checked); + + /* Insert this node to cache if it's not COW-only */ + if (!start->cowonly) { + rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, + &start->rb_node); + if (rb_node) + btrfs_backref_panic(cache->fs_info, start->bytenr, + -EEXIST); + list_add_tail(&start->lower, &cache->leaves); + } + + /* + * Use breadth first search to iterate all related edges. + * + * The starting points are all the edges of this node + */ + list_for_each_entry(edge, &start->upper, list[LOWER]) + list_add_tail(&edge->list[UPPER], &pending_edge); + + while (!list_empty(&pending_edge)) { + struct btrfs_backref_node *upper; + struct btrfs_backref_node *lower; + struct rb_node *rb_node; + + edge = list_first_entry(&pending_edge, + struct btrfs_backref_edge, list[UPPER]); + list_del_init(&edge->list[UPPER]); + upper = edge->node[UPPER]; + lower = edge->node[LOWER]; + + /* Parent is detached, no need to keep any edges */ + if (upper->detached) { + list_del(&edge->list[LOWER]); + btrfs_backref_free_edge(cache, edge); + + /* Lower node is orphan, queue for cleanup */ + if (list_empty(&lower->upper)) + list_add(&lower->list, useless_node); + continue; + } + + /* + * All new nodes added in current build_backref_tree() haven't + * been linked to the cache rb tree. + * So if we have upper->rb_node populated, this means a cache + * hit. We only need to link the edge, as @upper and all its + * parents have already been linked. + */ + if (!RB_EMPTY_NODE(&upper->rb_node)) { + if (upper->lowest) { + list_del_init(&upper->lower); + upper->lowest = 0; + } + + list_add_tail(&edge->list[UPPER], &upper->lower); + continue; + } + + /* Sanity check, we shouldn't have any unchecked nodes */ + if (!upper->checked) { + ASSERT(0); + return -EUCLEAN; + } + + /* Sanity check, COW-only node has non-COW-only parent */ + if (start->cowonly != upper->cowonly) { + ASSERT(0); + return -EUCLEAN; + } + + /* Only cache non-COW-only (subvolume trees) tree blocks */ + if (!upper->cowonly) { + rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr, + &upper->rb_node); + if (rb_node) { + btrfs_backref_panic(cache->fs_info, + upper->bytenr, -EEXIST); + return -EUCLEAN; + } + } + + list_add_tail(&edge->list[UPPER], &upper->lower); + + /* + * Also queue all the parent edges of this uncached node + * to finish the upper linkage + */ + list_for_each_entry(edge, &upper->upper, list[LOWER]) + list_add_tail(&edge->list[UPPER], &pending_edge); + } + return 0; +} + +void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *node) +{ + struct btrfs_backref_node *lower; + struct btrfs_backref_node *upper; + struct btrfs_backref_edge *edge; + + while (!list_empty(&cache->useless_node)) { + lower = list_first_entry(&cache->useless_node, + struct btrfs_backref_node, list); + list_del_init(&lower->list); + } + while (!list_empty(&cache->pending_edge)) { + edge = list_first_entry(&cache->pending_edge, + struct btrfs_backref_edge, list[UPPER]); + list_del(&edge->list[UPPER]); + list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; + upper = edge->node[UPPER]; + btrfs_backref_free_edge(cache, edge); + + /* + * Lower is no longer linked to any upper backref nodes and + * isn't in the cache, we can free it ourselves. + */ + if (list_empty(&lower->upper) && + RB_EMPTY_NODE(&lower->rb_node)) + list_add(&lower->list, &cache->useless_node); + + if (!RB_EMPTY_NODE(&upper->rb_node)) + continue; + + /* Add this guy's upper edges to the list to process */ + list_for_each_entry(edge, &upper->upper, list[LOWER]) + list_add_tail(&edge->list[UPPER], + &cache->pending_edge); + if (list_empty(&upper->upper)) + list_add(&upper->list, &cache->useless_node); + } + + while (!list_empty(&cache->useless_node)) { + lower = list_first_entry(&cache->useless_node, + struct btrfs_backref_node, list); + list_del_init(&lower->list); + if (lower == node) + node = NULL; + btrfs_backref_free_node(cache, lower); + } + + btrfs_backref_cleanup_node(cache, node); + ASSERT(list_empty(&cache->useless_node) && + list_empty(&cache->pending_edge)); +} |