summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 16:41:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 16:41:54 -0800
commitb695188dd39162a1a6bff11fdbcc4c0b65b933ab (patch)
treea3df7c052d38b5bfaf335fbf3130abcc5c6ca577 /fs/btrfs/tree-log.c
parent48476df99894492a0f7239f2f3c9a2dde4ff38e2 (diff)
parent180e001cd5fc2950dc6a7997dde5b65c954d0e79 (diff)
downloadlinux-b695188dd39162a1a6bff11fdbcc4c0b65b933ab.tar.bz2
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "The biggest feature in the pull is the new (and still experimental) raid56 code that David Woodhouse started long ago. I'm still working on the parity logging setup that will avoid inconsistent parity after a crash, so this is only for testing right now. But, I'd really like to get it out to a broader audience to hammer out any performance issues or other problems. scrub does not yet correct errors on raid5/6 either. Josef has another pass at fsync performance. The big change here is to combine waiting for metadata with waiting for data, which is a big latency win. It is also step one toward using atomics from the hardware during a commit. Mark Fasheh has a new way to use btrfs send/receive to send only the metadata changes. SUSE is using this to make snapper more efficient at finding changes between snapshosts. Snapshot-aware defrag is also included. Otherwise we have a large number of fixes and cleanups. Eric Sandeen wins the award for removing the most lines, and I'm hoping we steal this idea from XFS over and over again." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) btrfs: fixup/remove module.h usage as required Btrfs: delete inline extents when we find them during logging btrfs: try harder to allocate raid56 stripe cache Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails Btrfs: fix missing deleted items in btrfs_clean_quota_tree btrfs: use only inline_pages from extent buffer Btrfs: fix wrong reserved space when deleting a snapshot/subvolume Btrfs: fix wrong reserved space in qgroup during snap/subv creation Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot btrfs: remove a printk from scan_one_device Btrfs: fix NULL pointer after aborting a transaction Btrfs: fix memory leak of log roots Btrfs: copy everything if we've created an inline extent btrfs: cleanup for open-coded alignment Btrfs: do not change inode flags in rename Btrfs: use reserved space for creating a snapshot clear chunk_alloc flag on retryable failure ...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c166
1 files changed, 154 insertions, 12 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9027bb1e7466..c7ef569eb22a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log,
struct walk_control *wc, u64 gen)
{
if (wc->pin)
- btrfs_pin_extent_for_log_replay(wc->trans,
- log->fs_info->extent_root,
+ btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
eb->start, eb->len);
if (btrfs_buffer_uptodate(eb, gen, 0)) {
@@ -485,7 +484,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_key *key)
{
int found_type;
- u64 mask = root->sectorsize - 1;
u64 extent_end;
u64 start = key->offset;
u64 saved_nbytes;
@@ -502,7 +500,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
extent_end = start + btrfs_file_extent_num_bytes(eb, item);
else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size = btrfs_file_extent_inline_len(eb, item);
- extent_end = (start + size + mask) & ~mask;
+ extent_end = ALIGN(start + size, root->sectorsize);
} else {
ret = 0;
goto out;
@@ -2281,6 +2279,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
unsigned long log_transid = 0;
mutex_lock(&root->log_mutex);
+ log_transid = root->log_transid;
index1 = root->log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
wait_log_commit(trans, root, root->log_transid);
@@ -2308,11 +2307,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* bail out if we need to do a full commit */
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
ret = -EAGAIN;
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
- log_transid = root->log_transid;
if (log_transid % 2 == 0)
mark = EXTENT_DIRTY;
else
@@ -2324,6 +2323,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
@@ -2363,6 +2363,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
root->fs_info->last_trans_log_full_commit = trans->transid;
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out;
@@ -2373,6 +2374,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = 0;
goto out;
@@ -2392,6 +2394,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
@@ -2402,10 +2405,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ btrfs_wait_logged_extents(log, log_transid);
btrfs_set_super_log_root(root->fs_info->super_for_commit,
log_root_tree->node->start);
@@ -2461,8 +2466,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
.process_func = process_one_buffer
};
- ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
+ if (trans) {
+ ret = walk_log_tree(trans, log, &wc);
+ BUG_ON(ret);
+ }
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
+ /*
+ * We may have short-circuited the log tree with the full commit logic
+ * and left ordered extents on our list, so clear these out to keep us
+ * from leaking inodes and memory.
+ */
+ btrfs_free_logged_extents(log, 0);
+ btrfs_free_logged_extents(log, 1);
+
free_extent_buffer(log->node);
kfree(log);
}
@@ -2724,7 +2739,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, trans->transid);
+ path, trans->transid);
/*
* we didn't find anything from this transaction, see if there
@@ -3271,16 +3286,21 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root->log_root;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
+ struct btrfs_ordered_extent *ordered;
struct list_head ordered_sums;
struct btrfs_map_token token;
struct btrfs_key key;
- u64 csum_offset = em->mod_start - em->start;
- u64 csum_len = em->mod_len;
+ u64 mod_start = em->mod_start;
+ u64 mod_len = em->mod_len;
+ u64 csum_offset;
+ u64 csum_len;
u64 extent_offset = em->start - em->orig_start;
u64 block_len;
int ret;
+ int index = log->log_transid % 2;
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+insert:
INIT_LIST_HEAD(&ordered_sums);
btrfs_init_map_token(&token);
key.objectid = btrfs_ino(inode);
@@ -3296,6 +3316,23 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
+
+ /*
+ * If we are overwriting an inline extent with a real one then we need
+ * to just delete the inline extent as it may not be large enough to
+ * have the entire file_extent_item.
+ */
+ if (ret && btrfs_token_file_extent_type(leaf, fi, &token) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ ret = btrfs_del_item(trans, log, path);
+ btrfs_release_path(path);
+ if (ret) {
+ path->really_keep_locks = 0;
+ return ret;
+ }
+ goto insert;
+ }
+
btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
&token);
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3362,6 +3399,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
csum_len = block_len;
}
+ /*
+ * First check and see if our csums are on our outstanding ordered
+ * extents.
+ */
+again:
+ spin_lock_irq(&log->log_extents_lock[index]);
+ list_for_each_entry(ordered, &log->logged_list[index], log_list) {
+ struct btrfs_ordered_sum *sum;
+
+ if (!mod_len)
+ break;
+
+ if (ordered->inode != inode)
+ continue;
+
+ if (ordered->file_offset + ordered->len <= mod_start ||
+ mod_start + mod_len <= ordered->file_offset)
+ continue;
+
+ /*
+ * We are going to copy all the csums on this ordered extent, so
+ * go ahead and adjust mod_start and mod_len in case this
+ * ordered extent has already been logged.
+ */
+ if (ordered->file_offset > mod_start) {
+ if (ordered->file_offset + ordered->len >=
+ mod_start + mod_len)
+ mod_len = ordered->file_offset - mod_start;
+ /*
+ * If we have this case
+ *
+ * |--------- logged extent ---------|
+ * |----- ordered extent ----|
+ *
+ * Just don't mess with mod_start and mod_len, we'll
+ * just end up logging more csums than we need and it
+ * will be ok.
+ */
+ } else {
+ if (ordered->file_offset + ordered->len <
+ mod_start + mod_len) {
+ mod_len = (mod_start + mod_len) -
+ (ordered->file_offset + ordered->len);
+ mod_start = ordered->file_offset +
+ ordered->len;
+ } else {
+ mod_len = 0;
+ }
+ }
+
+ /*
+ * To keep us from looping for the above case of an ordered
+ * extent that falls inside of the logged extent.
+ */
+ if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
+ &ordered->flags))
+ continue;
+ atomic_inc(&ordered->refs);
+ spin_unlock_irq(&log->log_extents_lock[index]);
+ /*
+ * we've dropped the lock, we must either break or
+ * start over after this.
+ */
+
+ wait_event(ordered->wait, ordered->csum_bytes_left == 0);
+
+ list_for_each_entry(sum, &ordered->list, list) {
+ ret = btrfs_csum_file_blocks(trans, log, sum);
+ if (ret) {
+ btrfs_put_ordered_extent(ordered);
+ goto unlocked;
+ }
+ }
+ btrfs_put_ordered_extent(ordered);
+ goto again;
+
+ }
+ spin_unlock_irq(&log->log_extents_lock[index]);
+unlocked:
+
+ if (!mod_len || ret)
+ return ret;
+
+ csum_offset = mod_start - em->start;
+ csum_len = mod_len;
+
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
em->block_start + csum_offset,
@@ -3393,6 +3516,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
u64 test_gen;
int ret = 0;
+ int num = 0;
INIT_LIST_HEAD(&extents);
@@ -3401,16 +3525,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
list_del_init(&em->list);
+
+ /*
+ * Just an arbitrary number, this can be really CPU intensive
+ * once we start getting a lot of extents, and really once we
+ * have a bunch of extents we just want to commit since it will
+ * be faster.
+ */
+ if (++num > 32768) {
+ list_del_init(&tree->modified_extents);
+ ret = -EFBIG;
+ goto process;
+ }
+
if (em->generation <= test_gen)
continue;
/* Need a ref to keep it from getting evicted from cache */
atomic_inc(&em->refs);
set_bit(EXTENT_FLAG_LOGGING, &em->flags);
list_add_tail(&em->list, &extents);
+ num++;
}
list_sort(NULL, &extents, extent_cmp);
+process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
@@ -3513,6 +3652,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
mutex_lock(&BTRFS_I(inode)->log_mutex);
+ btrfs_get_logged_extents(log, inode);
+
/*
* a brute force approach to making sure we get the most uptodate
* copies of everything.
@@ -3558,7 +3699,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
while (1) {
ins_nr = 0;
ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, trans->transid);
+ path, trans->transid);
if (ret != 0)
break;
again:
@@ -3656,6 +3797,8 @@ log_extents:
BTRFS_I(inode)->logged_trans = trans->transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
out_unlock:
+ if (err)
+ btrfs_free_logged_extents(log, log->log_transid);
mutex_unlock(&BTRFS_I(inode)->log_mutex);
btrfs_free_path(path);
@@ -3822,7 +3965,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
end_trans:
dput(old_parent);
if (ret < 0) {
- WARN_ON(ret != -ENOSPC);
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 1;
}