summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-io-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-io-tree.c')
-rw-r--r--fs/btrfs/extent-io-tree.c194
1 files changed, 143 insertions, 51 deletions
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 83cb0378096f..3c7766dfaa69 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -2,6 +2,7 @@
#include <linux/slab.h>
#include <trace/events/btrfs.h>
+#include "messages.h"
#include "ctree.h"
#include "extent-io-tree.h"
#include "btrfs_inode.h"
@@ -57,17 +58,17 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
struct extent_io_tree *tree,
u64 start, u64 end)
{
- struct inode *inode = tree->private_data;
+ struct btrfs_inode *inode = tree->inode;
u64 isize;
if (!inode)
return;
- isize = i_size_read(inode);
+ isize = i_size_read(&inode->vfs_inode);
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
- btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
+ btrfs_debug_rl(inode->root->fs_info,
"%s: ino %llu isize %llu odd range [%llu,%llu]",
- caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
+ caller, btrfs_ino(inode), isize, start, end);
}
}
#else
@@ -93,13 +94,12 @@ struct tree_entry {
};
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *tree, unsigned int owner,
- void *private_data)
+ struct extent_io_tree *tree, unsigned int owner)
{
tree->fs_info = fs_info;
tree->state = RB_ROOT;
spin_lock_init(&tree->lock);
- tree->private_data = private_data;
+ tree->inode = NULL;
tree->owner = owner;
if (owner == IO_TREE_INODE_FILE_EXTENT)
lockdep_set_class(&tree->lock, &file_extent_tree_class);
@@ -346,9 +346,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
other = prev_state(state);
if (other && other->end == state->start - 1 &&
other->state == state->state) {
- if (tree->private_data)
- btrfs_merge_delalloc_extent(tree->private_data,
- state, other);
+ if (tree->inode)
+ btrfs_merge_delalloc_extent(tree->inode, state, other);
state->start = other->start;
rb_erase(&other->rb_node, &tree->state);
RB_CLEAR_NODE(&other->rb_node);
@@ -357,9 +356,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state)
other = next_state(state);
if (other && other->start == state->end + 1 &&
other->state == state->state) {
- if (tree->private_data)
- btrfs_merge_delalloc_extent(tree->private_data, state,
- other);
+ if (tree->inode)
+ btrfs_merge_delalloc_extent(tree->inode, state, other);
state->end = other->end;
rb_erase(&other->rb_node, &tree->state);
RB_CLEAR_NODE(&other->rb_node);
@@ -374,8 +372,8 @@ static void set_state_bits(struct extent_io_tree *tree,
u32 bits_to_set = bits & ~EXTENT_CTLBITS;
int ret;
- if (tree->private_data)
- btrfs_set_delalloc_extent(tree->private_data, state, bits);
+ if (tree->inode)
+ btrfs_set_delalloc_extent(tree->inode, state, bits);
ret = add_extent_changeset(state, bits_to_set, changeset, 1);
BUG_ON(ret < 0);
@@ -397,7 +395,7 @@ static int insert_state(struct extent_io_tree *tree,
u32 bits, struct extent_changeset *changeset)
{
struct rb_node **node;
- struct rb_node *parent;
+ struct rb_node *parent = NULL;
const u64 end = state->end;
set_state_bits(tree, state, bits, changeset);
@@ -462,8 +460,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
struct rb_node *parent = NULL;
struct rb_node **node;
- if (tree->private_data)
- btrfs_split_delalloc_extent(tree->private_data, orig, split);
+ if (tree->inode)
+ btrfs_split_delalloc_extent(tree->inode, orig, split);
prealloc->start = orig->start;
prealloc->end = split - 1;
@@ -510,8 +508,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
int ret;
- if (tree->private_data)
- btrfs_clear_delalloc_extent(tree->private_data, state, bits);
+ if (tree->inode)
+ btrfs_clear_delalloc_extent(tree->inode, state, bits);
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
BUG_ON(ret < 0);
@@ -572,7 +570,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
clear = 1;
again:
- if (!prealloc && gfpflags_allow_blocking(mask)) {
+ if (!prealloc) {
/*
* Don't care for allocation failure here because we might end
* up not needing the pre-allocated extent state at all, which
@@ -636,7 +634,8 @@ hit_next:
if (state->start < start) {
prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
+ if (!prealloc)
+ goto search_again;
err = split_state(tree, state, prealloc, start);
if (err)
extent_io_tree_panic(tree, err);
@@ -657,7 +656,8 @@ hit_next:
*/
if (state->start <= end && state->end > end) {
prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
+ if (!prealloc)
+ goto search_again;
err = split_state(tree, state, prealloc, end + 1);
if (err)
extent_io_tree_panic(tree, err);
@@ -714,7 +714,8 @@ static void wait_on_state(struct extent_io_tree *tree,
* The range [start, end] is inclusive.
* The tree lock is taken by this function
*/
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits)
+void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
+ struct extent_state **cached_state)
{
struct extent_state *state;
@@ -722,6 +723,16 @@ void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits)
spin_lock(&tree->lock);
again:
+ /*
+ * Maintain cached_state, as we may not remove it from the tree if there
+ * are more bits than the bits we're waiting on set on this state.
+ */
+ if (cached_state && *cached_state) {
+ state = *cached_state;
+ if (extent_state_in_tree(state) &&
+ state->start <= start && start < state->end)
+ goto process_node;
+ }
while (1) {
/*
* This search will find all the extents that end after our
@@ -752,6 +763,12 @@ process_node:
}
}
out:
+ /* This state is no longer useful, clear it and free it up. */
+ if (cached_state && *cached_state) {
+ state = *cached_state;
+ *cached_state = NULL;
+ free_extent_state(state);
+ }
spin_unlock(&tree->lock);
}
@@ -939,13 +956,17 @@ out:
* sleeping, so the gfp mask is used to indicate what is allowed.
*
* If any of the exclusive bits are set, this will fail with -EEXIST if some
- * part of the range already has the desired bits set. The start of the
- * existing range is returned in failed_start in this case.
+ * part of the range already has the desired bits set. The extent_state of the
+ * existing range is returned in failed_state in this case, and the start of the
+ * existing range is returned in failed_start. failed_state is used as an
+ * optimization for wait_extent_bit, failed_start must be used as the source of
+ * truth as failed_state may have changed since we returned.
*
* [start, end] is inclusive This takes the tree lock.
*/
static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u64 *failed_start,
+ struct extent_state **failed_state,
struct extent_state **cached_state,
struct extent_changeset *changeset, gfp_t mask)
{
@@ -964,9 +985,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (exclusive_bits)
ASSERT(failed_start);
else
- ASSERT(failed_start == NULL);
+ ASSERT(failed_start == NULL && failed_state == NULL);
again:
- if (!prealloc && gfpflags_allow_blocking(mask)) {
+ if (!prealloc) {
/*
* Don't care for allocation failure here because we might end
* up not needing the pre-allocated extent state at all, which
@@ -991,7 +1012,8 @@ again:
state = tree_search_for_insert(tree, start, &p, &parent);
if (!state) {
prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
+ if (!prealloc)
+ goto search_again;
prealloc->start = start;
prealloc->end = end;
insert_state_fast(tree, prealloc, p, parent, bits, changeset);
@@ -1012,6 +1034,7 @@ hit_next:
if (state->start == start && state->end <= end) {
if (state->state & exclusive_bits) {
*failed_start = state->start;
+ cache_state(state, failed_state);
err = -EEXIST;
goto out;
}
@@ -1047,6 +1070,7 @@ hit_next:
if (state->start < start) {
if (state->state & exclusive_bits) {
*failed_start = start;
+ cache_state(state, failed_state);
err = -EEXIST;
goto out;
}
@@ -1062,7 +1086,8 @@ hit_next:
}
prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
+ if (!prealloc)
+ goto search_again;
err = split_state(tree, state, prealloc, start);
if (err)
extent_io_tree_panic(tree, err);
@@ -1099,7 +1124,8 @@ hit_next:
this_end = last_start - 1;
prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
+ if (!prealloc)
+ goto search_again;
/*
* Avoid to free 'prealloc' if it can be merged with the later
@@ -1125,12 +1151,14 @@ hit_next:
if (state->start <= end && state->end > end) {
if (state->state & exclusive_bits) {
*failed_start = start;
+ cache_state(state, failed_state);
err = -EEXIST;
goto out;
}
prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
+ if (!prealloc)
+ goto search_again;
err = split_state(tree, state, prealloc, end + 1);
if (err)
extent_io_tree_panic(tree, err);
@@ -1162,8 +1190,8 @@ out:
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state, gfp_t mask)
{
- return __set_extent_bit(tree, start, end, bits, NULL, cached_state,
- NULL, mask);
+ return __set_extent_bit(tree, start, end, bits, NULL, NULL,
+ cached_state, NULL, mask);
}
/*
@@ -1397,7 +1425,7 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits)
{
struct extent_state *state;
- struct extent_state *prev = NULL, *next;
+ struct extent_state *prev = NULL, *next = NULL;
spin_lock(&tree->lock);
@@ -1487,30 +1515,82 @@ out:
}
/*
- * Count the number of bytes in the tree that have a given bit(s) set. This
- * can be fairly slow, except for EXTENT_DIRTY which is cached. The total
- * number found is returned.
+ * Count the number of bytes in the tree that have a given bit(s) set for a
+ * given range.
+ *
+ * @tree: The io tree to search.
+ * @start: The start offset of the range. This value is updated to the
+ * offset of the first byte found with the given bit(s), so it
+ * can end up being bigger than the initial value.
+ * @search_end: The end offset (inclusive value) of the search range.
+ * @max_bytes: The maximum byte count we are interested. The search stops
+ * once it reaches this count.
+ * @bits: The bits the range must have in order to be accounted for.
+ * If multiple bits are set, then only subranges that have all
+ * the bits set are accounted for.
+ * @contig: Indicate if we should ignore holes in the range or not. If
+ * this is true, then stop once we find a hole.
+ * @cached_state: A cached state to be used across multiple calls to this
+ * function in order to speedup searches. Use NULL if this is
+ * called only once or if each call does not start where the
+ * previous one ended.
+ *
+ * Returns the total number of bytes found within the given range that have
+ * all given bits set. If the returned number of bytes is greater than zero
+ * then @start is updated with the offset of the first byte with the bits set.
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- u32 bits, int contig)
+ u32 bits, int contig,
+ struct extent_state **cached_state)
{
- struct extent_state *state;
+ struct extent_state *state = NULL;
+ struct extent_state *cached;
u64 cur_start = *start;
u64 total_bytes = 0;
u64 last = 0;
int found = 0;
- if (WARN_ON(search_end <= cur_start))
+ if (WARN_ON(search_end < cur_start))
return 0;
spin_lock(&tree->lock);
+ if (!cached_state || !*cached_state)
+ goto search;
+
+ cached = *cached_state;
+
+ if (!extent_state_in_tree(cached))
+ goto search;
+
+ if (cached->start <= cur_start && cur_start <= cached->end) {
+ state = cached;
+ } else if (cached->start > cur_start) {
+ struct extent_state *prev;
+
+ /*
+ * The cached state starts after our search range's start. Check
+ * if the previous state record starts at or before the range we
+ * are looking for, and if so, use it - this is a common case
+ * when there are holes between records in the tree. If there is
+ * no previous state record, we can start from our cached state.
+ */
+ prev = prev_state(cached);
+ if (!prev)
+ state = cached;
+ else if (prev->start <= cur_start && cur_start <= prev->end)
+ state = prev;
+ }
+
/*
* This search will find all the extents that end after our range
* starts.
*/
- state = tree_search(tree, cur_start);
+search:
+ if (!state)
+ state = tree_search(tree, cur_start);
+
while (state) {
if (state->start > search_end)
break;
@@ -1531,7 +1611,16 @@ u64 count_range_bits(struct extent_io_tree *tree,
}
state = next_state(state);
}
+
+ if (cached_state) {
+ free_extent_state(*cached_state);
+ *cached_state = state;
+ if (state)
+ refcount_inc(&state->refs);
+ }
+
spin_unlock(&tree->lock);
+
return total_bytes;
}
@@ -1598,8 +1687,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
- return __set_extent_bit(tree, start, end, bits, NULL, NULL, changeset,
- GFP_NOFS);
+ return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL,
+ changeset, GFP_NOFS);
}
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1615,17 +1704,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
changeset);
}
-int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached)
{
int err;
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
- NULL, NULL, GFP_NOFS);
+ NULL, cached, NULL, GFP_NOFS);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
- EXTENT_LOCKED, NULL);
+ EXTENT_LOCKED, cached);
return 0;
}
return 1;
@@ -1638,20 +1728,22 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state)
{
+ struct extent_state *failed_state = NULL;
int err;
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
- cached_state, NULL, GFP_NOFS);
+ &failed_state, cached_state, NULL, GFP_NOFS);
while (err == -EEXIST) {
if (failed_start != start)
clear_extent_bit(tree, start, failed_start - 1,
EXTENT_LOCKED, cached_state);
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED,
+ &failed_state);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
- &failed_start, cached_state, NULL,
- GFP_NOFS);
+ &failed_start, &failed_state,
+ cached_state, NULL, GFP_NOFS);
}
return err;
}