From c8cc6341653721b54760480b0d0d9b5f09b46741 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 1 Jul 2013 16:18:19 -0400 Subject: Btrfs: stop using GFP_ATOMIC for the tree mod log allocations Previously we held the tree mod lock when adding stuff because we use it to check and see if we truly do want to track tree modifications. This is admirable, but GFP_ATOMIC in a critical area that is going to get hit pretty hard and often is not nice. So instead do our basic checks to see if we don't need to track modifications, and if those pass then do our allocation, and then when we go to insert the new modification check if we still care, and if we don't just free up our mod and return. Otherwise we're good to go and we can carry on. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 161 +++++++++++++++++++------------------------------------ 1 file changed, 56 insertions(+), 105 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ed504607d8ec..0d5c686f2b98 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -484,8 +484,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; + int ret = 0; + + BUG_ON(!tm); + + tree_mod_log_write_lock(fs_info); + if (list_empty(&fs_info->tree_mod_seq_list)) { + tree_mod_log_write_unlock(fs_info); + /* + * Ok we no longer care about logging modifications, free up tm + * and return 0. Any callers shouldn't be using tm after + * calling tree_mod_log_insert, but if they do we can just + * change this to return a special error code to let the callers + * do their own thing. + */ + kfree(tm); + return 0; + } - BUG_ON(!tm || !tm->seq); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; @@ -501,14 +520,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) else if (cur->seq > tm->seq) new = &((*new)->rb_right); else { + ret = -EEXIST; kfree(tm); - return -EEXIST; + goto out; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); - return 0; +out: + tree_mod_log_write_unlock(fs_info); + return ret; } /* @@ -524,57 +546,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 1; if (eb && btrfs_header_level(eb) == 0) return 1; - - tree_mod_log_write_lock(fs_info); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list when no blocker exists. - */ - tree_mod_log_write_unlock(fs_info); - return 1; - } - return 0; } -/* - * This allocates memory and gets a tree modification sequence number. - * - * Returns <0 on error. - * Returns >0 (the added sequence number) on success. - */ -static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) -{ - struct tree_mod_elem *tm; - - /* - * once we switch from spin locks to something different, we should - * honor the flags parameter here. - */ - tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); - if (!tm) - return -ENOMEM; - - spin_lock(&fs_info->tree_mod_seq_lock); - tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); - spin_unlock(&fs_info->tree_mod_seq_lock); - - return tm->seq; -} - static inline int __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int slot, enum mod_log_op op, gfp_t flags) { - int ret; struct tree_mod_elem *tm; - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - return ret; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; if (op != MOD_LOG_KEY_ADD) { @@ -589,34 +573,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - int ret; - if (tree_mod_dont_log(fs_info, eb)) return 0; - ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); - - tree_mod_log_write_unlock(fs_info); - return ret; -} - -static noinline int -tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - int slot, enum mod_log_op op) -{ - return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); -} - -static noinline int -tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op) -{ - return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); + return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); } static noinline int @@ -637,14 +601,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, * buffer, i.e. dst_slot < src_slot. */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING); + ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); BUG_ON(ret < 0); } - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -652,10 +616,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static inline void @@ -670,8 +631,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); + ret = __tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); BUG_ON(ret < 0); } } @@ -683,7 +644,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, int log_removal) { struct tree_mod_elem *tm; - int ret; if (tree_mod_dont_log(fs_info, NULL)) return 0; @@ -691,9 +651,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal) __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -701,10 +661,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static struct tree_mod_elem * @@ -784,23 +741,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { - tree_mod_log_write_unlock(fs_info); + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return; - } for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, + ret = __tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key_locked(fs_info, dst, + ret = __tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, + GFP_NOFS); BUG_ON(ret < 0); } - - tree_mod_log_write_unlock(fs_info); } static inline void @@ -819,9 +773,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, { int ret; - ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, - MOD_LOG_KEY_REPLACE, - atomic ? GFP_ATOMIC : GFP_NOFS); + ret = __tree_mod_log_insert_key(fs_info, eb, slot, + MOD_LOG_KEY_REPLACE, + atomic ? GFP_ATOMIC : GFP_NOFS); BUG_ON(ret < 0); } @@ -830,10 +784,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { if (tree_mod_dont_log(fs_info, eb)) return; - __tree_mod_log_free_eb(fs_info, eb); - - tree_mod_log_write_unlock(fs_info); } static noinline void @@ -1083,7 +1034,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != btrfs_header_generation(parent)); tree_mod_log_insert_key(root->fs_info, parent, parent_slot, - MOD_LOG_KEY_REPLACE); + MOD_LOG_KEY_REPLACE, GFP_NOFS); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, @@ -3208,7 +3159,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, } if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, GFP_NOFS); BUG_ON(ret < 0); } btrfs_set_node_key(lower, key, slot); @@ -4642,7 +4593,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, (nritems - slot - 1)); } else if (level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); } -- cgit v1.2.3 From 59516f6017c589e7316418fda6128ba8f829a77f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 1 Jul 2013 20:33:39 +0200 Subject: Btrfs: return -1 when lzo compression makes data bigger With this fix the lzo code behaves like the zlib code by returning an error code when compression does not help reduce the size of the file. This is currently not a bug since the compressed size is checked again in the calling method compress_file_range. Signed-off-by: Stefan Agner Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/lzo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index f93151a98886..b6a6f07c5ce2 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws, } /* we're making it bigger, give up */ - if (tot_in > 8192 && tot_in < tot_out) + if (tot_in > 8192 && tot_in < tot_out) { + ret = -1; goto out; + } /* we're all done */ if (tot_in >= len) -- cgit v1.2.3 From b2aaaa3b8c2153ef17f0e22287acbf8ee31a7c82 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Jul 2013 17:05:38 -0400 Subject: Btrfs: set lockdep class before locking new extent buffer We've been seeing spurious complaints out of lockdep because the lock class name changes. This is happening because when we drop a snapshot we will lock a block before we've read it in, which sets the lockdep class to whatever the default is. Then once we read the thing in we reset the lockdep class to what it is supposed to be, which blows lockdeps' mind. This patch should fix the problem, it appears to be the only place where we do this sort of thing. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1204c8ef6f32..7e172c6dba68 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7173,6 +7173,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!next) return -ENOMEM; + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, + level - 1); reada = 1; } btrfs_tree_lock(next); -- cgit v1.2.3 From d790155457a830d064d57e742521f114d3c38108 Mon Sep 17 00:00:00 2001 From: Carey Underwood Date: Mon, 4 Mar 2013 16:37:06 -0600 Subject: Btrfs: Release uuid_mutex for shrink during device delete Device scanning waits on the uuid_mutex, which can result in a very long wait if dev delete is shrinking the device. Signed-off-by: Carey Underwood Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 78b871753cb6..b5c2b6acbf60 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1562,7 +1562,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) clear_super = true; } + mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + mutex_lock(&uuid_mutex); if (ret) goto error_undo; -- cgit v1.2.3 From 74be9510876a66ad9826613ac8a526d26f9e7f01 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 5 Jul 2013 23:12:06 +0100 Subject: Btrfs: optimize btrfs_lookup_extent_info() If we're looking for a metadata item in the tree and the search fails with return value of 1, and the slot doesn't point to the first item in the leaf, check if the previous item in the leaf corresponds to an extent item for the same object id - if it does, then don't do another tree search to get it. This optimization is already done by btrfs-progs. V2: updated commit message. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e172c6dba68..3e7e05e6156f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -771,10 +771,23 @@ again: goto out_free; if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; - btrfs_release_path(path); - goto again; + metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->leafsize) + ret = 0; + } + if (ret) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + btrfs_release_path(path); + goto again; + } } if (ret == 0) { -- cgit v1.2.3 From ee3441b49092000402748f5345ee0a3d4c8ac04e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 9 Jul 2013 16:37:21 -0400 Subject: btrfs: fall back to global reservation when removing subvolumes I recently did some ENOSPC testing that involved filling the disk while create and removing snapshots in a loop. During the test cycle, I ran into an ENOSPC when trying to remove a snapshot, leaving the fs stuck in ENOSPC even after a umount/mount cycle. This patch allow subvolume removal to fall back onto the global block reservation in order to succeed when it would have failed otherwise. Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 8 +++++++- fs/btrfs/ioctl.c | 7 ++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e795bf135e80..8cc03321a368 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3175,7 +3175,7 @@ void btrfs_orphan_release_metadata(struct inode *inode); int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, - u64 *qgroup_reserved); + u64 *qgroup_reserved, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, u64 qgroup_reserved); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e7e05e6156f..7bea4d2f85dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4742,10 +4742,12 @@ void btrfs_orphan_release_metadata(struct inode *inode) int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int items, - u64 *qgroup_reserved) + u64 *qgroup_reserved, + bool use_global_rsv) { u64 num_bytes; int ret; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ @@ -4764,6 +4766,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_BLOCK_GROUP_METADATA); ret = btrfs_block_rsv_add(root, rsv, num_bytes, BTRFS_RESERVE_FLUSH_ALL); + + if (ret == -ENOSPC && use_global_rsv) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); + if (ret) { if (*qgroup_reserved) btrfs_qgroup_free(root, *qgroup_reserved); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 238a05545ee2..841044b80658 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -396,7 +396,7 @@ static noinline int create_subvol(struct inode *dir, * of create_snapshot(). */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 7, &qgroup_reserved); + 7, &qgroup_reserved, false); if (ret) return ret; @@ -576,7 +576,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, &pending_snapshot->block_rsv, 7, - &pending_snapshot->qgroup_reserved); + &pending_snapshot->qgroup_reserved, + false); if (ret) goto out; @@ -2174,7 +2175,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, * ref/backref. */ err = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 5, &qgroup_reserved); + 5, &qgroup_reserved, true); if (err) goto out_up_write; -- cgit v1.2.3 From 52ee28d249571616c80df7562bd4412ac253899a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 11 Jul 2013 17:51:15 +0800 Subject: Btrfs: make free space caching faster with many non-inline extent references So to cache free space, we iterate every extent item to gather free space info. When we have say 10,000 non-inline extent refs(such as BTRFS_EXTENT_DATA_REF), it takes quite a long time, and since inline extent refs and non-inline ones have same objectid in their keys, we can just re-search the tree with the next address to skip non-inline references. (This is found by dedup feature because dedup extents can end up with many non-inline extent refs.) Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7bea4d2f85dc..99aa9b77e948 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -420,6 +420,7 @@ again: /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); +next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; @@ -459,6 +460,16 @@ again: continue; } + if (key.objectid < last) { + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + + caching_ctl->progress = last; + btrfs_release_path(path); + goto next; + } + if (key.objectid < block_group->key.objectid) { path->slots[0]++; continue; -- cgit v1.2.3 From ccf39f92f37b0ee7d8de663fc438482f2019c4a6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 11 Jul 2013 15:41:11 +0200 Subject: btrfs: make errors in btrfs_num_copies less noisy The log message level 'critical' is verbose enough, 'emergency' beeps on all terminals. Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5c2b6acbf60..4c1dd540631e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4196,13 +4196,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) * and exit, so return 1 so the callers don't try to use other copies. */ if (!em) { - btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, + btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, logical+len); return 1; } if (em->start > logical || em->start + em->len < logical) { - btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " + btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); return 1; -- cgit v1.2.3 From 647f63bd363e5f4800bf3e85a613793e1eb4bf25 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 13 Jul 2013 12:25:15 +0100 Subject: Btrfs: add missing error checks to add_data_references The function relocation.c:add_data_references() was not checking if all calls to __add_tree_block() and find_data_references() were succeeding or not. Signed-off-by: Filipe David Borba Manana Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 12096496cc99..295a6115c326 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3628,7 +3628,7 @@ int add_data_references(struct reloc_control *rc, unsigned long ptr; unsigned long end; u32 blocksize = btrfs_level_size(rc->extent_root, 0); - int ret; + int ret = 0; int err = 0; eb = path->nodes[0]; @@ -3655,6 +3655,10 @@ int add_data_references(struct reloc_control *rc, } else { BUG(); } + if (ret) { + err = ret; + goto out; + } ptr += btrfs_extent_inline_ref_size(key.type); } WARN_ON(ptr > end); @@ -3700,6 +3704,7 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } +out: btrfs_release_path(path); if (err) free_block_list(blocks); -- cgit v1.2.3 From 1e7bac1ef754b3112eb78c64a7382d286e454424 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sat, 13 Jul 2013 21:02:54 +0800 Subject: Btrfs: set qgroup_ulist to be null after calling ulist_free() We call ulist_free(qgroup_ulist) in btrfs_free_qgroup_config(), and btrfs_free_qgroup_config() may be called in two cases: (1)umount filesystem (2)disabling quota However, if we firstly disable quota and then umount filesystem, a double free happens. Fix it. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1280eff8af56..64a9e3cb9453 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -460,7 +460,13 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) } kfree(qgroup); } + /* + * we call btrfs_free_qgroup_config() when umounting + * filesystem and disabling quota, so we set qgroup_ulit + * to be null here to avoid double free. + */ ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; } static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From 3cae210fa529d69cb25c2a3c491f29dab687b245 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Jul 2013 11:19:18 +0800 Subject: btrfs: Cleanup for using BTRFS_SETGET_STACK instead of raw convert Some codes still use the cpu_to_lexx instead of the BTRFS_SETGET_STACK_FUNCS declared in ctree.h. Also added some BTRFS_SETGET_STACK_FUNCS for btrfs_header btrfs_timespec and other structures. Signed-off-by: Qu Wenruo Reviewed-by: Miao Xie Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 110 ++++++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 47 +++++++++++++++++++ fs/btrfs/delayed-inode.c | 32 +++---------- fs/btrfs/disk-io.c | 14 +++--- fs/btrfs/ioctl.c | 28 ++++++------ fs/btrfs/root-tree.c | 14 +++--- fs/btrfs/scrub.c | 12 ++--- fs/btrfs/transaction.c | 4 +- fs/btrfs/volumes.c | 2 +- 9 files changed, 150 insertions(+), 113 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 1431a6965017..1f706e93e622 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -813,7 +813,7 @@ static int btrfsic_process_superblock_dev_mirror( (bh->b_data + (dev_bytenr & 4095)); if (btrfs_super_bytenr(super_tmp) != dev_bytenr || - super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || + btrfs_super_magic(super_tmp) != BTRFS_MAGIC || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || btrfs_super_nodesize(super_tmp) != state->metablock_size || btrfs_super_leafsize(super_tmp) != state->metablock_size || @@ -880,20 +880,20 @@ static int btrfsic_process_superblock_dev_mirror( tmp_disk_key.offset = 0; switch (pass) { case 0: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_TREE_OBJECTID); additional_string = "initial root "; next_bytenr = btrfs_super_root(super_tmp); break; case 1: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_CHUNK_TREE_OBJECTID); additional_string = "initial chunk "; next_bytenr = btrfs_super_chunk_root(super_tmp); break; case 2: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_TREE_LOG_OBJECTID); additional_string = "initial log "; next_bytenr = btrfs_super_log_root(super_tmp); if (0 == next_bytenr) @@ -1003,7 +1003,7 @@ continue_with_new_stack_frame: (struct btrfs_leaf *)sf->hdr; if (-1 == sf->i) { - sf->nr = le32_to_cpu(leafhdr->header.nritems); + sf->nr = btrfs_stack_header_nritems(&leafhdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO @@ -1013,9 +1013,11 @@ continue_with_new_stack_frame: sf->block_ctx->start, sf->nr, (unsigned long long) - le64_to_cpu(leafhdr->header.generation), + btrfs_stack_header_generation( + &leafhdr->header), (unsigned long long) - le64_to_cpu(leafhdr->header.owner)); + btrfs_stack_header_owner( + &leafhdr->header)); } continue_with_current_leaf_stack_frame: @@ -1047,10 +1049,10 @@ leaf_item_out_of_bounce_error: &disk_item, disk_item_offset, sizeof(struct btrfs_item)); - item_offset = le32_to_cpu(disk_item.offset); - item_size = le32_to_cpu(disk_item.size); + item_offset = btrfs_stack_item_offset(&disk_item); + item_size = btrfs_stack_item_offset(&disk_item); disk_key = &disk_item.key; - type = disk_key->type; + type = btrfs_disk_key_type(disk_key); if (BTRFS_ROOT_ITEM_KEY == type) { struct btrfs_root_item root_item; @@ -1066,7 +1068,7 @@ leaf_item_out_of_bounce_error: sf->block_ctx, &root_item, root_item_offset, item_size); - next_bytenr = le64_to_cpu(root_item.bytenr); + next_bytenr = btrfs_root_bytenr(&root_item); sf->error = btrfsic_create_link_to_next_block( @@ -1081,8 +1083,8 @@ leaf_item_out_of_bounce_error: &sf->num_copies, &sf->mirror_num, disk_key, - le64_to_cpu(root_item. - generation)); + btrfs_root_generation( + &root_item)); if (sf->error) goto one_stack_frame_backwards; @@ -1130,7 +1132,7 @@ leaf_item_out_of_bounce_error: struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; if (-1 == sf->i) { - sf->nr = le32_to_cpu(nodehdr->header.nritems); + sf->nr = btrfs_stack_header_nritems(&nodehdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "node %llu level %d items %d" @@ -1139,9 +1141,11 @@ leaf_item_out_of_bounce_error: sf->block_ctx->start, nodehdr->header.level, sf->nr, (unsigned long long) - le64_to_cpu(nodehdr->header.generation), + btrfs_stack_header_generation( + &nodehdr->header), (unsigned long long) - le64_to_cpu(nodehdr->header.owner)); + btrfs_stack_header_owner( + &nodehdr->header)); } continue_with_current_node_stack_frame: @@ -1168,7 +1172,7 @@ continue_with_current_node_stack_frame: btrfsic_read_from_block_data( sf->block_ctx, &key_ptr, key_ptr_offset, sizeof(struct btrfs_key_ptr)); - next_bytenr = le64_to_cpu(key_ptr.blockptr); + next_bytenr = btrfs_stack_key_blockptr(&key_ptr); sf->error = btrfsic_create_link_to_next_block( state, @@ -1182,7 +1186,7 @@ continue_with_current_node_stack_frame: &sf->num_copies, &sf->mirror_num, &key_ptr.key, - le64_to_cpu(key_ptr.generation)); + btrfs_stack_key_generation(&key_ptr)); if (sf->error) goto one_stack_frame_backwards; @@ -1444,12 +1448,13 @@ static int btrfsic_handle_extent_data( file_extent_item_offset, offsetof(struct btrfs_file_extent_item, disk_num_bytes)); if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || - ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { + btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", file_extent_item.type, (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr)); + btrfs_stack_file_extent_disk_bytenr( + &file_extent_item)); return 0; } @@ -1463,19 +1468,20 @@ static int btrfsic_handle_extent_data( btrfsic_read_from_block_data(block_ctx, &file_extent_item, file_extent_item_offset, sizeof(struct btrfs_file_extent_item)); - next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + - le64_to_cpu(file_extent_item.offset); - generation = le64_to_cpu(file_extent_item.generation); - num_bytes = le64_to_cpu(file_extent_item.num_bytes); - generation = le64_to_cpu(file_extent_item.generation); + next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) + + btrfs_stack_file_extent_offset(&file_extent_item); + generation = btrfs_stack_file_extent_generation(&file_extent_item); + num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); + generation = btrfs_stack_file_extent_generation(&file_extent_item); if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," " offset = %llu, num_bytes = %llu\n", file_extent_item.type, (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr), - (unsigned long long)le64_to_cpu(file_extent_item.offset), + btrfs_stack_file_extent_disk_bytenr(&file_extent_item), + (unsigned long long) + btrfs_stack_file_extent_offset(&file_extent_item), (unsigned long long)num_bytes); while (num_bytes > 0) { u32 chunk_len; @@ -1896,8 +1902,8 @@ again: struct list_head *tmp_ref_to; if (block->is_superblock) { - bytenr = le64_to_cpu(((struct btrfs_super_block *) - mapped_datav[0])->bytenr); + bytenr = btrfs_super_bytenr((struct btrfs_super_block *) + mapped_datav[0]); if (num_pages * PAGE_CACHE_SIZE < BTRFS_SUPER_INFO_SIZE) { printk(KERN_INFO @@ -1923,8 +1929,9 @@ again: return; } processed_len = state->metablock_size; - bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + bytenr = btrfs_stack_header_bytenr( + (struct btrfs_header *) + mapped_datav[0]); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); @@ -1992,13 +1999,13 @@ again: block->mirror_num, (unsigned long long)block->generation, (unsigned long long) - le64_to_cpu(block->disk_key.objectid), + btrfs_disk_key_objectid(&block->disk_key), block->disk_key.type, (unsigned long long) - le64_to_cpu(block->disk_key.offset), + btrfs_disk_key_offset(&block->disk_key), (unsigned long long) - le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation), + btrfs_stack_header_generation( + (struct btrfs_header *) mapped_datav[0]), (unsigned long long) state->max_superblock_generation); btrfsic_dump_tree(state); @@ -2015,8 +2022,9 @@ again: block->mirror_num, (unsigned long long)block->generation, (unsigned long long) - le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation)); + btrfs_stack_header_generation( + (struct btrfs_header *) + mapped_datav[0])); /* it would not be safe to go on */ btrfsic_dump_tree(state); goto continue_loop; @@ -2184,8 +2192,9 @@ again: block_ctx.pagev = NULL; } else { processed_len = state->metablock_size; - bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + bytenr = btrfs_stack_header_bytenr( + (struct btrfs_header *) + mapped_datav[0]); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) @@ -2434,13 +2443,14 @@ static int btrfsic_process_written_superblock( const char *additional_string = NULL; struct btrfs_disk_key tmp_disk_key; - tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; - tmp_disk_key.offset = 0; + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_ITEM_KEY); + btrfs_set_disk_key_objectid(&tmp_disk_key, 0); switch (pass) { case 0: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_TREE_OBJECTID); additional_string = "root "; next_bytenr = btrfs_super_root(super_hdr); if (state->print_mask & @@ -2449,8 +2459,8 @@ static int btrfsic_process_written_superblock( (unsigned long long)next_bytenr); break; case 1: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_CHUNK_TREE_OBJECTID); additional_string = "chunk "; next_bytenr = btrfs_super_chunk_root(super_hdr); if (state->print_mask & @@ -2459,8 +2469,8 @@ static int btrfsic_process_written_superblock( (unsigned long long)next_bytenr); break; case 2: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_TREE_LOG_OBJECTID); additional_string = "log "; next_bytenr = btrfs_super_log_root(super_hdr); if (0 == next_bytenr) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8cc03321a368..dda60e9f6b89 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2240,6 +2240,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, + sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, + transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, + nbytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, + block_group, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) @@ -2267,6 +2284,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item) BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); /* struct btrfs_dev_extent */ BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, @@ -2348,6 +2367,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, + blockptr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, + generation, 64); static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { @@ -2404,6 +2427,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb, /* struct btrfs_item */ BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); static inline unsigned long btrfs_item_nr_offset(int nr) { @@ -2466,6 +2491,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, + data_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, + name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, + transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -2568,6 +2600,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, + nritems, 32); +BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) { @@ -2830,6 +2868,7 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, csum_type, 16); BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, cache_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); static inline int btrfs_super_csum_size(struct btrfs_super_block *s) { @@ -2847,6 +2886,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, + struct btrfs_file_extent_item, disk_bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, + struct btrfs_file_extent_item, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, + struct btrfs_file_extent_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, + struct btrfs_file_extent_item, num_bytes, 64); static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 375510913fe7..685957ac208e 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -21,6 +21,7 @@ #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" +#include "ctree.h" #define BTRFS_DELAYED_WRITEBACK 512 #define BTRFS_DELAYED_BACKGROUND 128 @@ -1453,10 +1454,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, dir_item = (struct btrfs_dir_item *)delayed_item->data; dir_item->location = *disk_key; - dir_item->transid = cpu_to_le64(trans->transid); - dir_item->data_len = 0; - dir_item->name_len = cpu_to_le16(name_len); - dir_item->type = type; + btrfs_set_stack_dir_transid(dir_item, trans->transid); + btrfs_set_stack_dir_data_len(dir_item, 0); + btrfs_set_stack_dir_name_len(dir_item, name_len); + btrfs_set_stack_dir_type(dir_item, type); memcpy((char *)(dir_item + 1), name, name_len); ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); @@ -1699,7 +1700,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, di = (struct btrfs_dir_item *)curr->data; name = (char *)(di + 1); - name_len = le16_to_cpu(di->name_len); + name_len = btrfs_stack_dir_name_len(di); d_type = btrfs_filetype_table[di->type]; btrfs_disk_key_to_cpu(&location, &di->location); @@ -1716,27 +1717,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, return 0; } -BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, - sequence, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, - transid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, - nbytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, - block_group, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); - static void fill_stack_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item, struct inode *inode) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b092a1c4e37..56fcf8439f42 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1413,11 +1413,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, log_root->root_key.offset = root->root_key.objectid; inode_item = &log_root->root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -2575,7 +2575,7 @@ int open_ctree(struct super_block *sb, sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); - if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { + if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -2991,7 +2991,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) super = (struct btrfs_super_block *)bh->b_data; if (btrfs_super_bytenr(super) != bytenr || - super->magic != cpu_to_le64(BTRFS_MAGIC)) { + btrfs_super_magic(super) != BTRFS_MAGIC) { brelse(bh); continue; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 841044b80658..f856c97952db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -436,15 +436,15 @@ static noinline int create_subvol(struct inode *dir, memset(&root_item, 0, sizeof(root_item)); inode_item = &root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); - root_item.flags = 0; - root_item.byte_limit = 0; - inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); + btrfs_set_root_flags(&root_item, 0); + btrfs_set_root_limit(&root_item, 0); + btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); btrfs_set_root_bytenr(&root_item, leaf->start); btrfs_set_root_generation(&root_item, trans->transid); @@ -457,8 +457,8 @@ static noinline int create_subvol(struct inode *dir, btrfs_root_generation(&root_item)); uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); - root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); @@ -4011,10 +4011,10 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); - root_item->stime.sec = cpu_to_le64(sa->stime.sec); - root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); - root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); - root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); + btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); + btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); + btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); + btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ffb1036ef10d..70193d0e7058 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -490,13 +490,13 @@ again: */ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) { - u64 inode_flags = le64_to_cpu(root_item->inode.flags); + u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode); if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; - root_item->inode.flags = cpu_to_le64(inode_flags); - root_item->flags = 0; - root_item->byte_limit = 0; + btrfs_set_stack_inode_flags(&root_item->inode, inode_flags); + btrfs_set_root_flags(root_item, 0); + btrfs_set_root_limit(root_item, 0); } } @@ -507,8 +507,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct timespec ct = CURRENT_TIME; spin_lock(&root->root_item_lock); - item->ctransid = cpu_to_le64(trans->transid); - item->ctime.sec = cpu_to_le64(ct.tv_sec); - item->ctime.nsec = cpu_to_le32(ct.tv_nsec); + btrfs_set_root_ctransid(item, trans->transid); + btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec); + btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec); spin_unlock(&root->root_item_lock); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 64a157becbe5..266f24febce3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1345,12 +1345,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, mapped_buffer = kmap_atomic(sblock->pagev[0]->page); h = (struct btrfs_header *)mapped_buffer; - if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || + if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) { sblock->header_error = 1; - } else if (generation != le64_to_cpu(h->generation)) { + } else if (generation != btrfs_stack_header_generation(h)) { sblock->header_error = 1; sblock->generation_error = 1; } @@ -1720,10 +1720,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) * b) the page is already kmapped */ - if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) + if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) ++fail; - if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) + if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) ++fail; if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) @@ -1786,10 +1786,10 @@ static int scrub_checksum_super(struct scrub_block *sblock) s = (struct btrfs_super_block *)mapped_buffer; memcpy(on_disk_csum, s->csum, sctx->csum_size); - if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) + if (sblock->pagev[0]->logical != btrfs_super_bytenr(s)) ++fail_cor; - if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) + if (sblock->pagev[0]->generation != btrfs_super_generation(s)) ++fail_gen; if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index af1931a5960d..18f7e71d1593 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1225,8 +1225,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_stransid(new_root_item, 0); btrfs_set_root_rtransid(new_root_item, 0); } - new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); - new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec); btrfs_set_root_otransid(new_root_item, trans->transid); old = btrfs_lock_root_node(root); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4c1dd540631e..557a7438f929 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -865,7 +865,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = p + (bytenr & ~PAGE_CACHE_MASK); if (btrfs_super_bytenr(disk_super) != bytenr || - disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) + btrfs_super_magic(disk_super) != BTRFS_MAGIC) goto error_unmap; devid = btrfs_stack_device_id(&disk_super->dev_item); -- cgit v1.2.3 From 1095cc0d924e1c61fb34e0bfcec049d1fbcca77c Mon Sep 17 00:00:00 2001 From: chandan Date: Tue, 16 Jul 2013 12:28:56 +0530 Subject: btrfs_read_block_groups: Use enums to index btrfs_space_info->block_groups. The current code uses integer literals to index btrfs_space_info->block_groups[] array. Instead use corresponding enums from 'enum btrfs_raid_types'. Signed-off-by: chandan Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99aa9b77e948..a770a6318433 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8441,9 +8441,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) * avoid allocating from un-mirrored block group if there are * mirrored block groups. */ - list_for_each_entry(cache, &space_info->block_groups[3], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_RAID0], + list) set_block_group_ro(cache, 1); - list_for_each_entry(cache, &space_info->block_groups[4], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_SINGLE], + list) set_block_group_ro(cache, 1); } -- cgit v1.2.3 From 599c75ec3f7f3b606e8a0a684c00f12190712de8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 16 Jul 2013 19:03:36 +0800 Subject: Btrfs/tracepoint: update delayed ref tracepoints This shows exactly how btrfs processes the delayed refs onto disks, which is very helpful on understanding delayed ref mechanism and debugging related bugs. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/delayed-ref.c | 6 ++--- fs/btrfs/extent-tree.c | 6 +++++ include/trace/events/btrfs.h | 60 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index c219463fb1fd..f7be9f76a12b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, INIT_LIST_HEAD(&head_ref->cluster); mutex_init(&head_ref->mutex); - trace_btrfs_delayed_ref_head(ref, head_ref, action); + trace_add_delayed_ref_head(ref, head_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); @@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->type = BTRFS_TREE_BLOCK_REF_KEY; full_ref->level = level; - trace_btrfs_delayed_tree_ref(ref, full_ref, action); + trace_add_delayed_tree_ref(ref, full_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); @@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, full_ref->objectid = owner; full_ref->offset = offset; - trace_btrfs_delayed_data_ref(ref, full_ref, action); + trace_add_delayed_data_ref(ref, full_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a770a6318433..ca02d3c1724c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2035,6 +2035,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); + trace_run_delayed_data_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; else @@ -2178,6 +2180,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); + trace_run_delayed_tree_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; else @@ -2236,6 +2240,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, */ BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); + trace_run_delayed_ref_head(node, head, node->action); + if (insert_reserved) { btrfs_pin_extent(root, node->bytenr, node->num_bytes, 1); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 2902657ba766..45702c3c3837 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -439,7 +439,7 @@ TRACE_EVENT(btrfs_sync_fs, { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" }) -TRACE_EVENT(btrfs_delayed_tree_ref, +DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_tree_ref *full_ref, @@ -481,7 +481,25 @@ TRACE_EVENT(btrfs_delayed_tree_ref, (unsigned long long)__entry->seq) ); -TRACE_EVENT(btrfs_delayed_data_ref, +DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_data_ref *full_ref, @@ -527,7 +545,25 @@ TRACE_EVENT(btrfs_delayed_data_ref, (unsigned long long)__entry->seq) ); -TRACE_EVENT(btrfs_delayed_ref_head, +DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_ref_head *head_ref, @@ -556,6 +592,24 @@ TRACE_EVENT(btrfs_delayed_ref_head, __entry->is_data) ); +DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action) +); + #define show_chunk_type(type) \ __print_flags(type, "|", \ { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \ -- cgit v1.2.3 From d4c34f6bffe246bcf05a1471873667d678a47a54 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 16 Jul 2013 15:38:33 +0200 Subject: Btrfs: Print key type in decimal everywhere This is confusing, sometimes the key type is printed in hex (without a leading "0x" which makes things even more complicated), sometimes in decimal... Change it to be in decimal everywhere. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/print-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index dc0024f17c1f..fe7bf4a12568 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -95,7 +95,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot) struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); - printk(KERN_INFO "\t\ttree block key (%llu %x %llu) " + printk(KERN_INFO "\t\ttree block key (%llu %u %llu) " "level %d\n", (unsigned long long)btrfs_disk_key_objectid(&key), key.type, @@ -183,7 +183,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); - printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " + printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " "itemsize %d\n", i, (unsigned long long)key.objectid, type, -- cgit v1.2.3 From a1b83ac52d23c85581b87836c346fb5cb90f6bfd Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 19 Jul 2013 17:39:32 +0800 Subject: btrfs: fix get set label blocking against balance btrfs_ioctl_get_fslabel() and btrfs_ioctl_set_fslabel() used root->fs_info->volume_mutex mutex which caused operations like balance to block set/get label operation until its completion and generally balance operation takes a long time to complete, so it will be annoying to the user when cli appears hung also this patch will add a bit of optimization within the btrfs_ioctl_get_falabel() function. v1->v2: use fs_info->super_lock instead of uuid_mutex Signed-off-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f856c97952db..556b3d5b18da 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4042,18 +4042,22 @@ out: static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) { struct btrfs_root *root = BTRFS_I(file_inode(file))->root; - const char *label = root->fs_info->super_copy->label; - size_t len = strnlen(label, BTRFS_LABEL_SIZE); + size_t len; int ret; + char label[BTRFS_LABEL_SIZE]; + + spin_lock(&root->fs_info->super_lock); + memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); + spin_unlock(&root->fs_info->super_lock); + + len = strnlen(label, BTRFS_LABEL_SIZE); if (len == BTRFS_LABEL_SIZE) { pr_warn("btrfs: label is too long, return the first %zu bytes\n", --len); } - mutex_lock(&root->fs_info->volume_mutex); ret = copy_to_user(arg, label, len); - mutex_unlock(&root->fs_info->volume_mutex); return ret ? -EFAULT : 0; } @@ -4082,18 +4086,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) if (ret) return ret; - mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_unlock; } + spin_lock(&root->fs_info->super_lock); strcpy(super_block->label, label); + spin_unlock(&root->fs_info->super_lock); ret = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->volume_mutex); mnt_drop_write_file(file); return ret; } -- cgit v1.2.3 From 50f1319cb5f7690e4d9de18d1a75ea89296d0e53 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 22 Jul 2013 12:50:37 -0400 Subject: Btrfs: reset ret in record_one_backref I was getting warnings when running find ./ -type f -exec btrfs fi defrag -f {} \; from record_one_backref because ret was set. Turns out it was because it was set to 1 because the search slot didn't come out exact and we never reset it. So reset it to 0 right after the search so we don't leak this and get uneccessary warnings. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 021694c08181..d3280b2b7592 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2132,6 +2132,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, WARN_ON(1); return ret; } + ret = 0; while (1) { cond_resched(); @@ -2181,8 +2182,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, old->len || extent_offset + num_bytes <= old->extent_offset + old->offset) continue; - - ret = 0; break; } -- cgit v1.2.3 From b37b39cd6b8af7a7c39fc7340d71f3db492af9e2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Jul 2013 16:57:15 -0400 Subject: Btrfs: cleanup reloc roots properly on error I was hitting the BUG_ON() at the end of merge_reloc_roots() because we were aborting the transaction at some point previously and then getting an error when we tried to drop the reloc root. I fixed btrfs_drop_snapshot to re-add us to the dead roots list if we failed, but this isn't the right thing to do for reloc roots since it uses root->root_list for it's own stuff in order to know what needs to be cleaned up. So fix btrfs_drop_snapshot to only do the re-add if we aren't dropping for reloc, and handle errors from merge_reloc_root() by dropping the reloc root we are processing since it won't be on the list of roots to cleanup. With this patch my reproducer no longer panics. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/relocation.c | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ca02d3c1724c..e868c35f760c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7696,7 +7696,7 @@ out: * don't have it in the radix (like when we recover after a power fail * or unmount) so we don't leak memory. */ - if (root_dropped == false) + if (!for_reloc && root_dropped == false) btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 295a6115c326..5a23d875ab40 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2314,8 +2314,13 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); - if (ret) + if (ret) { + __update_reloc_root(reloc_root, 1); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + kfree(reloc_root); goto out; + } } else { list_del_init(&reloc_root->root_list); } -- cgit v1.2.3 From 2112ac800d43e795323bc18558e43fd3641da4ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 26 Jul 2013 09:13:15 -0400 Subject: Btrfs: don't bother autodefragging if our root is going away We can end up with inodes on the auto defrag list that exist on roots that are going to be deleted. This is extra work we don't need to do, so just bail if our root has 0 root refs. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8e686a427ce2..8cc941e5b3e1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -310,6 +310,11 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, goto cleanup; } + if (btrfs_root_refs(&inode_root->root_item) == 0) { + ret = -ENOENT; + goto cleanup; + } + key.objectid = defrag->ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; -- cgit v1.2.3 From 3dc0e818afa25a605091b6a16c2c4e5698c0a9c2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 22 Jul 2013 09:55:15 +0300 Subject: btrfs/raid56: fix and cleanup some error paths The alloc_rbio() frees "raid_map" and "bbio" on error, so there is a potential double free bug in raid56_parity_write(). The raid56_parity_write() and raid56_parity_recover() functions should still free "raid_map" and "bbio" on error if other errors occur though, so I have added some more calls to kfree(). Signed-off-by: Dan Carpenter Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/raid56.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0525e1389f5b..0db856ceecc4 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1687,11 +1687,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct blk_plug_cb *cb; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) { - kfree(raid_map); - kfree(bbio); + if (IS_ERR(rbio)) return PTR_ERR(rbio); - } bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_size; @@ -2041,9 +2038,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) { + if (IS_ERR(rbio)) return PTR_ERR(rbio); - } rbio->read_rebuild = 1; bio_list_add(&rbio->bio_list, bio); @@ -2052,6 +2048,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); + kfree(raid_map); + kfree(bbio); kfree(rbio); return -EIO; } -- cgit v1.2.3 From 3cd846d1d7640dd6b64d251cb9efd1a490d9a601 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 22 Jul 2013 16:36:57 +0800 Subject: Btrfs, raid56: fix memory leak when allocating pages for p/q stripes failed Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/raid56.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0db856ceecc4..d0ecfbd9cc9f 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1540,8 +1540,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio) int ret; ret = alloc_rbio_parity_pages(rbio); - if (ret) + if (ret) { + __free_raid_bio(rbio); return ret; + } ret = lock_stripe_add(rbio); if (ret == 0) -- cgit v1.2.3 From 2c334e87f31783ca80b7ce265b25ba5489bfad1a Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 24 Jul 2013 10:29:05 +0800 Subject: Btrfs: add sanity checks regarding to parsing mount options I just notice the following commands succeed: mount -o thread_pool=-1 This is ridiculous, only positive thread_pool makes sense,this patch adds sanity checks for them, and also catches the error of ENOMEM if allocating memory fails. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/super.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8eb6191d86da..cc002959493c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -496,10 +496,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, NOBARRIER); break; case Opt_thread_pool: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg > 0) { info->thread_pool_size = intarg; + } else { + ret = -EINVAL; + goto out; + } break; case Opt_max_inline: num = match_strdup(&args[0]); @@ -514,6 +519,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) } printk(KERN_INFO "btrfs: max_inline at %llu\n", (unsigned long long)info->max_inline); + } else { + ret = -ENOMEM; + goto out; } break; case Opt_alloc_start: @@ -526,6 +534,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: allocations start at %llu\n", (unsigned long long)info->alloc_start); + } else { + ret = -ENOMEM; + goto out; } break; case Opt_noacl: @@ -540,12 +551,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); break; case Opt_ratio: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) { + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg >= 0) { info->metadata_ratio = intarg; printk(KERN_INFO "btrfs: metadata ratio %d\n", info->metadata_ratio); + } else { + ret = -EINVAL; + goto out; } break; case Opt_discard: @@ -596,13 +611,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); break; case Opt_check_integrity_print_mask: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) { + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg >= 0) { info->check_integrity_print_mask = intarg; printk(KERN_INFO "btrfs:" " check_integrity_print_mask 0x%x\n", info->check_integrity_print_mask); + } else { + ret = -EINVAL; + goto out; } break; #else @@ -679,17 +698,25 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvol: kfree(*subvol_name); *subvol_name = match_strdup(&args[0]); + if (!*subvol_name) { + error = -ENOMEM; + goto out; + } break; case Opt_subvolid: - intarg = 0; error = match_int(&args[0], &intarg); if (!error) { + goto out; + } else if (intarg >= 0) { /* we want the original fs_tree */ if (!intarg) *subvol_objectid = BTRFS_FS_TREE_OBJECTID; else *subvol_objectid = intarg; + } else { + error = -EINVAL; + goto out; } break; case Opt_subvolrootid: -- cgit v1.2.3 From 1493381f2f95a7d7383eeeb490ce0da9d9a7368f Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 24 Jul 2013 10:29:20 +0800 Subject: Btrfs: use u64 for subvolid when parsing mount options Although for most time, int is enough for subvolid, we should ensure safety in theory. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/super.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index cc002959493c..d82391d48974 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -327,7 +327,7 @@ enum { static match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, - {Opt_subvolid, "subvolid=%d"}, + {Opt_subvolid, "subvolid=%s"}, {Opt_device, "device=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, @@ -673,8 +673,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + char *num = NULL; int error = 0; - int intarg; if (!options) return 0; @@ -704,16 +704,14 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, } break; case Opt_subvolid: - error = match_int(&args[0], &intarg); - if (!error) { - goto out; - } else if (intarg >= 0) { + num = match_strdup(&args[0]); + if (num) { + *subvol_objectid = memparse(num, NULL); + kfree(num); /* we want the original fs_tree */ - if (!intarg) + if (!*subvol_objectid) *subvol_objectid = BTRFS_FS_TREE_OBJECTID; - else - *subvol_objectid = intarg; } else { error = -EINVAL; goto out; -- cgit v1.2.3 From 8507d216a41f37ef466b90b5e53cdb741c6e1c17 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 24 Jul 2013 10:30:02 +0800 Subject: Btrfs: add missing mounting options in btrfs_show_options() Some options are missing in btrfs_show_options(), this patch adds them. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/super.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d82391d48974..2cc5b80eeae9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -965,6 +965,20 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",inode_cache"); if (btrfs_test_opt(root, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); + if (btrfs_test_opt(root, RECOVERY)) + seq_puts(seq, ",recovery"); +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) + seq_puts(seq, ",check_int_data"); + else if (btrfs_test_opt(root, CHECK_INTEGRITY)) + seq_puts(seq, ",check_int"); + if (info->check_integrity_print_mask) + seq_printf(seq, ",check_int_print_mask=%d", + info->check_integrity_print_mask); +#endif + if (info->metadata_ratio) + seq_printf(seq, ",metadata_ratio=%d", + info->metadata_ratio); if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); return 0; -- cgit v1.2.3 From 09a7f7a28939fe281b6f2fef0fb1a733007d91fe Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Jul 2013 19:22:32 +0800 Subject: Btrfs: remove unnecessary argument of bio_readpage_error() Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe443fece851..5d31f80c0562 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2202,8 +2202,7 @@ out: */ static int bio_readpage_error(struct bio *failed_bio, struct page *page, - u64 start, u64 end, int failed_mirror, - struct extent_state *state) + u64 start, u64 end, int failed_mirror) { struct io_failure_record *failrec = NULL; u64 private; @@ -2212,6 +2211,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_state *state; struct bio *bio; int num_copies; int ret; @@ -2296,22 +2296,18 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " - "state=%p, num_copies=%d, next_mirror %d, " - "failed_mirror %d\n", state, num_copies, - failrec->this_mirror, failed_mirror); + pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; } - if (!state) { - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&tree->lock); - } + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, failrec->start, + EXTENT_LOCKED); + if (state && state->start != failrec->start) + state = NULL; + spin_unlock(&tree->lock); /* * there are two premises: @@ -2541,7 +2537,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * can't handle the error it will return -EIO and we * remain responsible for that page. */ - ret = bio_readpage_error(bio, page, start, end, mirror, NULL); + ret = bio_readpage_error(bio, page, start, end, mirror); if (ret == 0) { uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); -- cgit v1.2.3 From f2a09da9d0cba17ad4041e7e54f1ca840b12d0be Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Jul 2013 19:22:33 +0800 Subject: Btrfs: add branch prediction hints in the read page end IO function This patch add some branch prediction hints into the end IO function of the read page, it reduced the percentage of the branch misses from 5.5% to 4.9%. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5d31f80c0562..0297f6f4d4c3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2502,7 +2502,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) spin_lock(&tree->lock); state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (state && state->start == start) { + if (likely(state && state->start == start)) { /* * take a reference on the state, unlock will drop * the ref @@ -2512,7 +2512,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) spin_unlock(&tree->lock); mirror = io_bio->mirror_num; - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + if (likely(uptodate && tree->ops && + tree->ops->readpage_end_io_hook)) { ret = tree->ops->readpage_end_io_hook(page, start, end, state, mirror); if (ret) @@ -2521,12 +2522,15 @@ static void end_bio_extent_readpage(struct bio *bio, int err) clean_io_failure(start, page); } - if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { + if (likely(uptodate)) + goto readpage_ok; + + if (tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook(page, mirror); if (!ret && !err && test_bit(BIO_UPTODATE, &bio->bi_flags)) uptodate = 1; - } else if (!uptodate) { + } else { /* * The generic bio_readpage_error handles errors the * following way: If possible, new read requests are @@ -2547,7 +2551,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) continue; } } - +readpage_ok: if (uptodate && tree->track_uptodate) { set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); -- cgit v1.2.3 From facc8a2247340a9735fe8cc123c5da2102f5ef1b Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Jul 2013 19:22:34 +0800 Subject: Btrfs: don't cache the csum value into the extent state tree Before applying this patch, we cached the csum value into the extent state tree when reading some data from the disk, this operation increased the lock contention of the state tree. Now, we just store the csum value into the bio structure or other unshared structure, so we can reduce the lock contention. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 21 +++++++++ fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_io.c | 125 +++++++++++++++++++------------------------------ fs/btrfs/extent_io.h | 10 ++-- fs/btrfs/file-item.c | 81 +++++++++++++++++++++----------- fs/btrfs/inode.c | 85 ++++++++++++++------------------- fs/btrfs/volumes.h | 7 +++ 8 files changed, 174 insertions(+), 164 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 08b286b2a2c5..d0ae226926ee 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) return 0; } +struct btrfs_dio_private { + struct inode *inode; + u64 logical_offset; + u64 disk_bytenr; + u64 bytes; + void *private; + + /* number of bios pending for this dio */ + atomic_t pending_bios; + + /* IO errors */ + int errors; + + /* orig_bio is our btrfs_io_bio */ + struct bio *orig_bio; + + /* dio_bio came from fs/direct-io.c */ + struct bio *dio_bio; + u8 csum[0]; +}; + /* * Disable DIO read nolock optimization, so new dio readers will be forced * to grab i_mutex. It is used to avoid the endless truncate due to diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dda60e9f6b89..cbb1263752f8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3556,12 +3556,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, struct btrfs_inode_extref **extref_ret); /* file-item.c */ +struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 logical_offset); + struct btrfs_dio_private *dip, struct bio *bio, + u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 56fcf8439f42..8e2eee67c3a8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -576,8 +576,9 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror) +static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) { struct extent_io_tree *tree; u64 found_start; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0297f6f4d4c3..6fbacfabb660 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1837,64 +1837,6 @@ out: return ret; } -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count) -{ - struct rb_node *node; - struct extent_state *state; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - - while (count) { - state->private = *csums++; - count--; - state = next_state(state); - } - spin_unlock(&tree->lock); -} - -static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index) -{ - struct bio_vec *bvec = bio->bi_io_vec + bio_index; - - return page_offset(bvec->bv_page) + bvec->bv_offset; -} - -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index, - u32 csums[], int count) -{ - struct rb_node *node; - struct extent_state *state = NULL; - u64 start; - - spin_lock(&tree->lock); - do { - start = __btrfs_get_bio_offset(bio, bio_index); - if (state == NULL || state->start != start) { - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - } - state->private = *csums++; - count--; - bio_index++; - - state = next_state(state); - } while (count); - spin_unlock(&tree->lock); -} - int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) { struct rb_node *node; @@ -2201,8 +2143,9 @@ out: * needed */ -static int bio_readpage_error(struct bio *failed_bio, struct page *page, - u64 start, u64 end, int failed_mirror) +static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int failed_mirror) { struct io_failure_record *failrec = NULL; u64 private; @@ -2211,8 +2154,9 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_state *state; struct bio *bio; + struct btrfs_io_bio *btrfs_failed_bio; + struct btrfs_io_bio *btrfs_bio; int num_copies; int ret; int read_mode; @@ -2302,13 +2246,6 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, return -EIO; } - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&tree->lock); - /* * there are two premises: * a) deliver good data to the caller @@ -2345,9 +2282,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, read_mode = READ_SYNC; } - if (!state || failrec->this_mirror > num_copies) { - pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " - "next_mirror %d, failed_mirror %d\n", state, + if (failrec->this_mirror > num_copies) { + pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; @@ -2358,12 +2294,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, free_io_failure(inode, failrec, 0); return -EIO; } - bio->bi_private = state; bio->bi_end_io = failed_bio->bi_end_io; bio->bi_sector = failrec->logical >> 9; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio->bi_size = 0; + btrfs_failed_bio = btrfs_io_bio(failed_bio); + if (btrfs_failed_bio->csum) { + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = btrfs_bio->csum_inline; + phy_offset >>= inode->i_sb->s_blocksize_bits; + phy_offset *= csum_size; + memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, + csum_size); + } + bio_add_page(bio, page, failrec->len, start - page_offset(page)); pr_debug("bio_readpage_error: submitting new read[%#x] to " @@ -2462,9 +2410,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; + u64 offset = 0; u64 start; u64 end; + u64 len; int mirror; int ret; @@ -2475,7 +2426,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct page *page = bvec->bv_page; struct extent_state *cached = NULL; struct extent_state *state; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " @@ -2496,6 +2446,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; + len = bvec->bv_len; if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); @@ -2514,8 +2465,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) mirror = io_bio->mirror_num; if (likely(uptodate && tree->ops && tree->ops->readpage_end_io_hook)) { - ret = tree->ops->readpage_end_io_hook(page, start, end, - state, mirror); + ret = tree->ops->readpage_end_io_hook(io_bio, offset, + page, start, end, + mirror); if (ret) uptodate = 0; else @@ -2541,7 +2493,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * can't handle the error it will return -EIO and we * remain responsible for that page. */ - ret = bio_readpage_error(bio, page, start, end, mirror); + ret = bio_readpage_error(bio, offset, page, start, end, + mirror); if (ret == 0) { uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -2573,8 +2526,11 @@ readpage_ok: SetPageError(page); } unlock_page(page); + offset += len; } while (bvec <= bvec_end); + if (io_bio->end_io) + io_bio->end_io(io_bio, err); bio_put(bio); } @@ -2586,6 +2542,7 @@ struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags) { + struct btrfs_io_bio *btrfs_bio; struct bio *bio; bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); @@ -2601,6 +2558,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, bio->bi_size = 0; bio->bi_bdev = bdev; bio->bi_sector = first_sector; + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; } return bio; } @@ -2614,7 +2575,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) /* this also allocates from the btrfs_bioset */ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + struct btrfs_io_bio *btrfs_bio; + struct bio *bio; + + bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + if (bio) { + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; + } + return bio; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3b8c4e26e1da..f7544afefdb9 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -62,6 +62,7 @@ struct extent_state; struct btrfs_root; +struct btrfs_io_bio; typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num, @@ -77,8 +78,9 @@ struct extent_io_ops { size_t size, struct bio *bio, unsigned long bio_flags); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror); + int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int mirror); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); void (*set_bit_hook)(struct inode *inode, struct extent_state *state, @@ -262,10 +264,6 @@ int extent_readpages(struct extent_io_tree *tree, int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent); int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count); -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, - int bvec_index, u32 csums[], int count); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a7bfc9541803..f87d09a0dfaa 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -23,6 +23,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "volumes.h" #include "print-tree.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ @@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } +static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err) +{ + kfree(bio->csum_allocated); +} + static int __btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 logical_offset, u32 *dst, int dio) { - u32 sum[16]; - int len; struct bio_vec *bvec = bio->bi_io_vec; - int bio_index = 0; + struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); + struct btrfs_csum_item *item = NULL; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_path *path; + u8 *csum; u64 offset = 0; u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; u32 diff; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int nblocks; + int bio_index = 0; int count; - struct btrfs_path *path; - struct btrfs_csum_item *item = NULL; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); if (!path) return -ENOMEM; + + nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits; + if (!dst) { + if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { + btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, + GFP_NOFS); + if (!btrfs_bio->csum_allocated) { + btrfs_free_path(path); + return -ENOMEM; + } + btrfs_bio->csum = btrfs_bio->csum_allocated; + btrfs_bio->end_io = btrfs_io_bio_endio_readpage; + } else { + btrfs_bio->csum = btrfs_bio->csum_inline; + } + csum = btrfs_bio->csum; + } else { + csum = (u8 *)dst; + } + if (bio->bi_size > PAGE_CACHE_SIZE * 8) path->reada = 2; @@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, if (dio) offset = logical_offset; while (bio_index < bio->bi_vcnt) { - len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index); if (!dio) offset = page_offset(bvec->bv_page) + bvec->bv_offset; - count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum, - len); + count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, + (u32 *)csum, nblocks); if (count) goto found; @@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, path, disk_bytenr, 0); if (IS_ERR(item)) { count = 1; - sum[0] = 0; + memset(csum, 0, csum_size); if (BTRFS_I(inode)->root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { set_extent_bits(io_tree, offset, @@ -249,23 +275,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, diff = disk_bytenr - item_start_offset; diff = diff / root->sectorsize; diff = diff * csum_size; - count = min_t(int, len, (item_last_offset - disk_bytenr) >> - inode->i_sb->s_blocksize_bits); - read_extent_buffer(path->nodes[0], sum, + count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >> + inode->i_sb->s_blocksize_bits); + read_extent_buffer(path->nodes[0], csum, ((unsigned long)item) + diff, csum_size * count); found: - if (dst) { - memcpy(dst, sum, count * csum_size); - dst += count; - } else { - if (dio) - extent_cache_csums_dio(io_tree, offset, sum, - count); - else - extent_cache_csums(io_tree, bio, bio_index, sum, - count); - } + csum += count * csum_size; + nblocks -= count; while (count--) { disk_bytenr += bvec->bv_len; offset += bvec->bv_len; @@ -284,9 +301,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, } int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 offset) + struct btrfs_dio_private *dip, struct bio *bio, + u64 offset) { - return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); + int len = (bio->bi_sector << 9) - dip->disk_bytenr; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int ret; + + len >>= inode->i_sb->s_blocksize_bits; + len *= csum_size; + + ret = __btrfs_lookup_bio_sums(root, inode, bio, offset, + (u32 *)(dip->csum + len), 1); + return ret; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3280b2b7592..b47330c8e02c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2826,16 +2826,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, * if there's a match, we allow the bio to finish. If not, the code in * extent_io.c will try to find good copies for us. */ -static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror) +static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) { size_t offset = start - page_offset(page); struct inode *inode = page->mapping->host; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; - u64 private = ~(u32)0; - int ret; struct btrfs_root *root = BTRFS_I(inode)->root; + u32 csum_expected; u32 csum = ~(u32)0; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2855,19 +2855,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - if (state && state->start == start) { - private = state->private; - ret = 0; - } else { - ret = get_state_private(io_tree, start, &private); - } - kaddr = kmap_atomic(page); - if (ret) - goto zeroit; + phy_offset >>= inode->i_sb->s_blocksize_bits; + csum_expected = *(((u32 *)io_bio->csum) + phy_offset); + kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); btrfs_csum_final(csum, (char *)&csum); - if (csum != private) + if (csum != csum_expected) goto zeroit; kunmap_atomic(kaddr); @@ -2876,14 +2870,13 @@ good: zeroit: if (__ratelimit(&_rs)) - btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", + btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", (unsigned long long)btrfs_ino(page->mapping->host), - (unsigned long long)start, csum, - (unsigned long long)private); + (unsigned long long)start, csum, csum_expected); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr); - if (private == 0) + if (csum_expected == 0) return 0; return -EIO; } @@ -6812,26 +6805,6 @@ unlock_err: return ret; } -struct btrfs_dio_private { - struct inode *inode; - u64 logical_offset; - u64 disk_bytenr; - u64 bytes; - void *private; - - /* number of bios pending for this dio */ - atomic_t pending_bios; - - /* IO errors */ - int errors; - - /* orig_bio is our btrfs_io_bio */ - struct bio *orig_bio; - - /* dio_bio came from fs/direct-io.c */ - struct bio *dio_bio; -}; - static void btrfs_endio_direct_read(struct bio *bio, int err) { struct btrfs_dio_private *dip = bio->bi_private; @@ -6840,6 +6813,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct bio *dio_bio; + u32 *csums = (u32 *)dip->csum; + int index = 0; u64 start; start = dip->logical_offset; @@ -6848,12 +6823,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct page *page = bvec->bv_page; char *kaddr; u32 csum = ~(u32)0; - u64 private = ~(u32)0; unsigned long flags; - if (get_state_private(&BTRFS_I(inode)->io_tree, - start, &private)) - goto failed; local_irq_save(flags); kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + bvec->bv_offset, @@ -6863,18 +6834,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) local_irq_restore(flags); flush_dcache_page(bvec->bv_page); - if (csum != private) { -failed: - btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)start, - csum, (unsigned)private); + if (csum != csums[index]) { + btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", + (unsigned long long)btrfs_ino(inode), + (unsigned long long)start, + csum, csums[index]); err = -EIO; } } start += bvec->bv_len; bvec++; + index++; } while (bvec <= bvec_end); unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, @@ -6991,6 +6962,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, int async_submit) { + struct btrfs_dio_private *dip = bio->bi_private; int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -7025,7 +6997,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); + ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, + file_offset); if (ret) goto err; } @@ -7060,6 +7033,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio_put(orig_bio); return -EIO; } + if (map_length >= orig_bio->bi_size) { bio = orig_bio; goto submit; @@ -7155,19 +7129,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct btrfs_dio_private *dip; struct bio *io_bio; int skip_sum; + int sum_len; int write = rw & REQ_WRITE; int ret = 0; + u16 csum_size; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); - if (!io_bio) { ret = -ENOMEM; goto free_ordered; } - dip = kmalloc(sizeof(*dip), GFP_NOFS); + if (!skip_sum && !write) { + csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; + sum_len *= csum_size; + } else { + sum_len = 0; + } + + dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); if (!dip) { ret = -ENOMEM; goto free_io_bio; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 86705583480d..08c44d9059b1 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -152,6 +152,8 @@ struct btrfs_fs_devices { int rotating; }; +#define BTRFS_BIO_INLINE_CSUM_SIZE 64 + /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). @@ -161,9 +163,14 @@ struct btrfs_fs_devices { * we allocate are actually btrfs_io_bios. We'll cram as much of * struct btrfs_bio as we can into this over time. */ +typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); struct btrfs_io_bio { unsigned long mirror_num; unsigned long stripe_index; + u8 *csum; + u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; + u8 *csum_allocated; + btrfs_io_bio_end_io_t *end_io; struct bio bio; }; -- cgit v1.2.3 From 883d0de485222715929f7b7e2a9a34dc9b9b2be2 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Jul 2013 19:22:35 +0800 Subject: Btrfs: batch the extent state operation in the end io handle of the read page Before applying this patch, we set the uptodate flag and unlock the extent by the page size, it is unnecessary, we can do it in batches, it can reduce the lock contention of the extent state tree. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 73 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6fbacfabb660..0a77b48c93e5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -762,15 +762,6 @@ static void cache_state(struct extent_state *state, } } -static void uncache_state(struct extent_state **cached_ptr) -{ - if (cached_ptr && (*cached_ptr)) { - struct extent_state *state = *cached_ptr; - *cached_ptr = NULL; - free_extent_state(state); - } -} - /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -2115,7 +2106,8 @@ static int clean_io_failure(u64 start, struct page *page) EXTENT_LOCKED); spin_unlock(&BTRFS_I(inode)->io_tree.lock); - if (state && state->start == failrec->start) { + if (state && state->start <= failrec->start && + state->end >= failrec->start + failrec->len - 1) { fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); @@ -2394,6 +2386,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err) bio_put(bio); } +static void +endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, + int uptodate) +{ + struct extent_state *cached = NULL; + u64 end = start + len - 1; + + if (uptodate && tree->track_uptodate) + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); +} + /* * after a readpage IO is done, we need to: * clear the uptodate bits on error @@ -2416,6 +2420,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) u64 start; u64 end; u64 len; + u64 extent_start = 0; + u64 extent_len = 0; int mirror; int ret; @@ -2424,8 +2430,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; - struct extent_state *cached = NULL; - struct extent_state *state; struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " @@ -2451,17 +2455,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (likely(state && state->start == start)) { - /* - * take a reference on the state, unlock will drop - * the ref - */ - cache_state(state, &cached); - } - spin_unlock(&tree->lock); - mirror = io_bio->mirror_num; if (likely(uptodate && tree->ops && tree->ops->readpage_end_io_hook)) { @@ -2500,18 +2493,11 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; - uncache_state(&cached); continue; } } readpage_ok: - if (uptodate && tree->track_uptodate) { - set_extent_uptodate(tree, start, end, &cached, - GFP_ATOMIC); - } - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); - - if (uptodate) { + if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; @@ -2527,8 +2513,33 @@ readpage_ok: } unlock_page(page); offset += len; + + if (unlikely(!uptodate)) { + if (extent_len) { + endio_readpage_release_extent(tree, + extent_start, + extent_len, 1); + extent_start = 0; + extent_len = 0; + } + endio_readpage_release_extent(tree, start, + end - start + 1, 0); + } else if (!extent_len) { + extent_start = start; + extent_len = end + 1 - start; + } else if (extent_start + extent_len == start) { + extent_len += end + 1 - start; + } else { + endio_readpage_release_extent(tree, extent_start, + extent_len, uptodate); + extent_start = start; + extent_len = end + 1 - start; + } } while (bvec <= bvec_end); + if (extent_len) + endio_readpage_release_extent(tree, extent_start, extent_len, + uptodate); if (io_bio->end_io) io_bio->end_io(io_bio, err); bio_put(bio); -- cgit v1.2.3 From 9974090bdd7ac310d99a8ce6da7d6a19b3099ff9 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Jul 2013 19:22:36 +0800 Subject: Btrfs: batch the extent state operation when reading pages In the past, we cached the checksum value in the extent state object, so we had to split the extent state object by the block size, or we had no space to keep this checksum value. But it increased the lock contention of the extent state tree. Now we removed this limit by caching the checksum into the bio object, so it is unnecessary to do the extent state operations by the block size, we can do it in batches, in this way, we can reduce the lock contention of the extent state tree. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 135 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0a77b48c93e5..0d40d082f0c7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2726,11 +2726,11 @@ void set_page_extent_mapped(struct page *page) * handlers) * XXX JDM: This needs looking at to ensure proper page locking */ -static int __extent_read_full_page(struct extent_io_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) +static int __do_readpage(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2744,7 +2744,6 @@ static int __extent_read_full_page(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; - struct btrfs_ordered_extent *ordered; int ret; int nr = 0; size_t pg_offset = 0; @@ -2755,24 +2754,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, set_page_extent_mapped(page); + end = page_end; if (!PageUptodate(page)) { if (cleancache_get_page(page) == 0) { BUG_ON(blocksize != PAGE_SIZE); + unlock_extent(tree, start, end); goto out; } } - end = page_end; - while (1) { - lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_extent(inode, start); - if (!ordered) - break; - unlock_extent(tree, start, end); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } - if (page->index == last_byte >> PAGE_CACHE_SHIFT) { char *userpage; size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); @@ -2901,6 +2891,101 @@ out: return 0; } +static inline void __do_contiguous_readpages(struct extent_io_tree *tree, + struct page *pages[], int nr_pages, + u64 start, u64 end, + get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode; + struct btrfs_ordered_extent *ordered; + int index; + + inode = pages[0]->mapping->host; + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_range(inode, start, + end - start + 1); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + for (index = 0; index < nr_pages; index++) { + __do_readpage(tree, pages[index], get_extent, bio, mirror_num, + bio_flags, rw); + page_cache_release(pages[index]); + } +} + +static void __extent_readpages(struct extent_io_tree *tree, + struct page *pages[], + int nr_pages, get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + u64 start; + u64 end = 0; + u64 page_start; + int index; + int first_index; + + for (index = 0; index < nr_pages; index++) { + page_start = page_offset(pages[index]); + if (!end) { + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } else if (end + 1 == page_start) { + end += PAGE_CACHE_SIZE; + } else { + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, bio, + mirror_num, bio_flags, rw); + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } + } + + if (end) + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, bio, + mirror_num, bio_flags, rw); +} + +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode = page->mapping->host; + struct btrfs_ordered_extent *ordered; + u64 start = page_offset(page); + u64 end = start + PAGE_CACHE_SIZE - 1; + int ret; + + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_extent(inode, start); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + ret = __do_readpage(tree, page, get_extent, bio, mirror_num, bio_flags, + rw); + return ret; +} + int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num) { @@ -3751,7 +3836,6 @@ int extent_readpages(struct extent_io_tree *tree, unsigned long bio_flags = 0; struct page *pagepool[16]; struct page *page; - int i = 0; int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -3768,18 +3852,13 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + __extent_readpages(tree, pagepool, nr, get_extent, + &bio, 0, &bio_flags, READ); nr = 0; } - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + if (nr) + __extent_readpages(tree, pagepool, nr, get_extent, + &bio, 0, &bio_flags, READ); BUG_ON(!list_empty(pages)); if (bio) -- cgit v1.2.3 From 125bac016d60e78120e92904a5b2fc3a5ebf0475 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Jul 2013 19:22:37 +0800 Subject: Btrfs: cache the extent map struct when reading several pages When we read several pages at once, we needn't get the extent map object every time we deal with a page, and we can cache the extent map object. So, we can reduce the search time of the extent map, and besides that, we also can reduce the lock contention of the extent map tree. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 57 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0d40d082f0c7..daf180dafe39 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2720,6 +2720,33 @@ void set_page_extent_mapped(struct page *page) } } +static struct extent_map * +__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, + u64 start, u64 len, get_extent_t *get_extent, + struct extent_map **em_cached) +{ + struct extent_map *em; + + if (em_cached && *em_cached) { + em = *em_cached; + if (em->in_tree && start >= em->start && + start < extent_map_end(em)) { + atomic_inc(&em->refs); + return em; + } + + free_extent_map(em); + *em_cached = NULL; + } + + em = get_extent(inode, page, pg_offset, start, len, 0); + if (em_cached && !IS_ERR_OR_NULL(em)) { + BUG_ON(*em_cached); + atomic_inc(&em->refs); + *em_cached = em; + } + return em; +} /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io @@ -2729,6 +2756,7 @@ void set_page_extent_mapped(struct page *page) static int __do_readpage(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, + struct extent_map **em_cached, struct bio **bio, int mirror_num, unsigned long *bio_flags, int rw) { @@ -2793,8 +2821,8 @@ static int __do_readpage(struct extent_io_tree *tree, &cached, GFP_NOFS); break; } - em = get_extent(inode, page, pg_offset, cur, - end - cur + 1, 0); + em = __get_extent_map(inode, page, pg_offset, cur, + end - cur + 1, get_extent, em_cached); if (IS_ERR_OR_NULL(em)) { SetPageError(page); unlock_extent(tree, cur, end); @@ -2895,6 +2923,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, struct page *pages[], int nr_pages, u64 start, u64 end, get_extent_t *get_extent, + struct extent_map **em_cached, struct bio **bio, int mirror_num, unsigned long *bio_flags, int rw) { @@ -2915,8 +2944,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, } for (index = 0; index < nr_pages; index++) { - __do_readpage(tree, pages[index], get_extent, bio, mirror_num, - bio_flags, rw); + __do_readpage(tree, pages[index], get_extent, em_cached, bio, + mirror_num, bio_flags, rw); page_cache_release(pages[index]); } } @@ -2924,6 +2953,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, static void __extent_readpages(struct extent_io_tree *tree, struct page *pages[], int nr_pages, get_extent_t *get_extent, + struct extent_map **em_cached, struct bio **bio, int mirror_num, unsigned long *bio_flags, int rw) { @@ -2944,8 +2974,9 @@ static void __extent_readpages(struct extent_io_tree *tree, } else { __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, - end, get_extent, bio, - mirror_num, bio_flags, rw); + end, get_extent, em_cached, + bio, mirror_num, bio_flags, + rw); start = page_start; end = start + PAGE_CACHE_SIZE - 1; first_index = index; @@ -2955,7 +2986,7 @@ static void __extent_readpages(struct extent_io_tree *tree, if (end) __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, - end, get_extent, bio, + end, get_extent, em_cached, bio, mirror_num, bio_flags, rw); } @@ -2981,8 +3012,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, btrfs_put_ordered_extent(ordered); } - ret = __do_readpage(tree, page, get_extent, bio, mirror_num, bio_flags, - rw); + ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, + bio_flags, rw); return ret; } @@ -3836,6 +3867,7 @@ int extent_readpages(struct extent_io_tree *tree, unsigned long bio_flags = 0; struct page *pagepool[16]; struct page *page; + struct extent_map *em_cached = NULL; int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -3852,14 +3884,17 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - __extent_readpages(tree, pagepool, nr, get_extent, + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, &bio, 0, &bio_flags, READ); nr = 0; } if (nr) - __extent_readpages(tree, pagepool, nr, get_extent, + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, &bio, 0, &bio_flags, READ); + if (em_cached) + free_extent_map(em_cached); + BUG_ON(!list_empty(pages)); if (bio) return submit_one_bio(READ, bio, 0, bio_flags); -- cgit v1.2.3 From 8068a47e2a77ecae27cfd0298e8732756f565ed1 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 26 Jul 2013 01:29:35 +0800 Subject: btrfs: use BTRFS_SUPER_INFO_SIZE macro at btrfs_read_dev_super() Signed-off-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e2eee67c3a8..13564354c927 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2984,9 +2984,11 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) */ for (i = 0; i < 1; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + i_size_read(bdev->bd_inode)) break; - bh = __bread(bdev, bytenr / 4096, 4096); + bh = __bread(bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); if (!bh) continue; -- cgit v1.2.3 From c2790a2e2bc824084717fde031a8e0d370fc8650 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 29 Jul 2013 11:20:47 -0400 Subject: Btrfs: cleanup arguments to extent_clear_unlock_delalloc This patch removes the io_tree argument for extent_clear_unlock_delalloc since we always use &BTRFS_I(inode)->io_tree, and it separates out the extent tree operations from the page operations. This way we just pass in the extent bits we want to clear and then pass in the operations we want done to the pages. This is because I'm going to fix what extent bits we clear in some cases and rather than add a bunch of new flags we'll just use the actual extent bits we want to clear. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 32 ++++------ fs/btrfs/extent_io.h | 21 +++---- fs/btrfs/inode.c | 162 +++++++++++++++++++++------------------------------ 3 files changed, 85 insertions(+), 130 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index daf180dafe39..c137f98fbd24 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1678,31 +1678,21 @@ out_failed: return found; } -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op) +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long clear_bits, + unsigned long page_ops) { + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; int ret; struct page *pages[16]; unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; - unsigned long clear_bits = 0; - - if (op & EXTENT_CLEAR_UNLOCK) - clear_bits |= EXTENT_LOCKED; - if (op & EXTENT_CLEAR_DIRTY) - clear_bits |= EXTENT_DIRTY; - - if (op & EXTENT_CLEAR_DELALLOC) - clear_bits |= EXTENT_DELALLOC; clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | - EXTENT_SET_PRIVATE2))) + if (page_ops == 0) return 0; while (nr_pages > 0) { @@ -1711,20 +1701,20 @@ int extent_clear_unlock_delalloc(struct inode *inode, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { - if (op & EXTENT_SET_PRIVATE2) + if (page_ops & PAGE_SET_PRIVATE2) SetPagePrivate2(pages[i]); if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; } - if (op & EXTENT_CLEAR_DIRTY) + if (page_ops & PAGE_CLEAR_DIRTY) clear_page_dirty_for_io(pages[i]); - if (op & EXTENT_SET_WRITEBACK) + if (page_ops & PAGE_SET_WRITEBACK) set_page_writeback(pages[i]); - if (op & EXTENT_END_WRITEBACK) + if (page_ops & PAGE_END_WRITEBACK) end_page_writeback(pages[i]); - if (op & EXTENT_CLEAR_UNLOCK_PAGE) + if (page_ops & PAGE_UNLOCK) unlock_page(pages[i]); page_cache_release(pages[i]); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f7544afefdb9..c450620a333f 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -44,14 +44,11 @@ #define EXTENT_BUFFER_DUMMY 9 /* these are flags for extent_clear_unlock_delalloc */ -#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 -#define EXTENT_CLEAR_UNLOCK 0x2 -#define EXTENT_CLEAR_DELALLOC 0x4 -#define EXTENT_CLEAR_DIRTY 0x8 -#define EXTENT_SET_WRITEBACK 0x10 -#define EXTENT_END_WRITEBACK 0x20 -#define EXTENT_SET_PRIVATE2 0x40 -#define EXTENT_CLEAR_ACCOUNTING 0x80 +#define PAGE_UNLOCK (1 << 0) +#define PAGE_CLEAR_DIRTY (1 << 1) +#define PAGE_SET_WRITEBACK (1 << 2) +#define PAGE_END_WRITEBACK (1 << 3) +#define PAGE_SET_PRIVATE2 (1 << 4) /* * page->private values. Every page that is controlled by the extent @@ -328,10 +325,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op); +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long bits_to_clear, + unsigned long page_ops); struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b47330c8e02c..d6c915d82b4a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -489,13 +489,13 @@ cont: * we don't need to create any more async work items. * Unlock and free up our temp pages. */ - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, start, end, NULL, + EXTENT_DIRTY | + EXTENT_DELALLOC, + PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_end_transaction(trans, root); goto free_pages_out; } @@ -592,13 +592,10 @@ free_pages_out: goto out; cleanup_and_out: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, NULL, + EXTENT_DIRTY | EXTENT_DELALLOC, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); if (!trans || IS_ERR(trans)) btrfs_error(root->fs_info, ret, "Failed to join transaction"); else @@ -770,16 +767,12 @@ retry: /* * clear dirty, set writeback and unlock the pages. */ - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - async_extent->start, + extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); - + NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DIRTY, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK); ret = btrfs_submit_compressed_write(inode, async_extent->start, async_extent->ram_size, @@ -798,16 +791,13 @@ out: out_free_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_free: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - async_extent->start, + extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DIRTY, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); kfree(async_extent); goto again; } @@ -892,15 +882,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, ret = cow_file_range_inline(trans, root, inode, start, end, 0, 0, NULL); if (ret == 0) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, NULL, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DIRTY, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; @@ -990,13 +976,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, * Do set the Private2 bit so we know this page was properly * setup for writepage */ - op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; - op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_SET_PRIVATE2; + op = unlock ? PAGE_UNLOCK : 0; + op |= PAGE_SET_PRIVATE2; - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, start + ram_size - 1, - locked_page, op); + extent_clear_unlock_delalloc(inode, start, + start + ram_size - 1, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC, + op); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -1008,16 +994,11 @@ out: out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_unlock: - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DIRTY | + EXTENT_DELALLOC, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); goto out; } @@ -1033,15 +1014,12 @@ static noinline int cow_file_range(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DIRTY, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); return PTR_ERR(trans); } trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1221,15 +1199,12 @@ static noinline int run_delalloc_nocow(struct inode *inode, path = btrfs_alloc_path(); if (!path) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DIRTY, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); return -ENOMEM; } @@ -1241,15 +1216,12 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DIRTY, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_free_path(path); return PTR_ERR(trans); } @@ -1428,11 +1400,11 @@ out_check: } } - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - cur_offset, cur_offset + num_bytes - 1, - locked_page, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_SET_PRIVATE2); + extent_clear_unlock_delalloc(inode, cur_offset, + cur_offset + num_bytes - 1, + locked_page, EXTENT_LOCKED | + EXTENT_DELALLOC, PAGE_UNLOCK | + PAGE_SET_PRIVATE2); cur_offset = extent_end; if (cur_offset > end) break; @@ -1460,16 +1432,12 @@ error: ret = err; if (ret && cur_offset < end) - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - cur_offset, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, cur_offset, end, + locked_page, EXTENT_LOCKED | + EXTENT_DELALLOC | EXTENT_DIRTY, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_free_path(path); return ret; } -- cgit v1.2.3 From 151a41bc46df2a9cb5e05c3b3265cedc1f65a86f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 29 Jul 2013 13:22:24 -0400 Subject: Btrfs: fix what bits we clear when erroring out from delalloc First of all we no longer set EXTENT_DIRTY when we dirty an extent so this patch removes the clearing of EXTENT_DIRTY since it confuses me. This patch also adds clearing EXTENT_DEFRAG and also doing EXTENT_DO_ACCOUNTING when we have errors. This is because if we are clearing delalloc without adding an ordered extent then we need to make sure the enospc handling stuff is accounted for. Also if this range was DEFRAG we need to make sure that bit is cleared so we dont leak it. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6c915d82b4a..27fa81977f86 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -484,15 +484,17 @@ cont: compress_type, pages); } if (ret <= 0) { + unsigned long clear_flags = EXTENT_DELALLOC | + EXTENT_DEFRAG; + clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; + /* * inline extent creation worked or returned error, * we don't need to create any more async work items. * Unlock and free up our temp pages. */ extent_clear_unlock_delalloc(inode, start, end, NULL, - EXTENT_DIRTY | - EXTENT_DELALLOC, - PAGE_UNLOCK | + clear_flags, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); @@ -593,9 +595,10 @@ free_pages_out: cleanup_and_out: extent_clear_unlock_delalloc(inode, start, end, NULL, - EXTENT_DIRTY | EXTENT_DELALLOC, - PAGE_UNLOCK | PAGE_CLEAR_DIRTY | - PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); if (!trans || IS_ERR(trans)) btrfs_error(root->fs_info, ret, "Failed to join transaction"); else @@ -770,8 +773,8 @@ retry: extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DIRTY, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + NULL, EXTENT_LOCKED | EXTENT_DELALLOC, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK); ret = btrfs_submit_compressed_write(inode, async_extent->start, @@ -795,9 +798,9 @@ out_free: async_extent->start + async_extent->ram_size - 1, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DIRTY, PAGE_UNLOCK | - PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | - PAGE_END_WRITEBACK); + EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); kfree(async_extent); goto again; } @@ -884,7 +887,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, if (ret == 0) { extent_clear_unlock_delalloc(inode, start, end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DIRTY, PAGE_UNLOCK | + EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); @@ -995,10 +998,10 @@ out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_unlock: extent_clear_unlock_delalloc(inode, start, end, locked_page, - EXTENT_LOCKED | EXTENT_DIRTY | - EXTENT_DELALLOC, PAGE_UNLOCK | - PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | - PAGE_END_WRITEBACK); + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | + EXTENT_DELALLOC | EXTENT_DEFRAG, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); goto out; } @@ -1016,7 +1019,8 @@ static noinline int cow_file_range(struct inode *inode, if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, start, end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DIRTY, PAGE_UNLOCK | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); @@ -1201,7 +1205,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, if (!path) { extent_clear_unlock_delalloc(inode, start, end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DIRTY, PAGE_UNLOCK | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); @@ -1218,7 +1223,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, start, end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DIRTY, PAGE_UNLOCK | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); @@ -1434,8 +1440,9 @@ error: if (ret && cur_offset < end) extent_clear_unlock_delalloc(inode, cur_offset, end, locked_page, EXTENT_LOCKED | - EXTENT_DELALLOC | EXTENT_DIRTY, - PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + EXTENT_DELALLOC | EXTENT_DEFRAG | + EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); btrfs_free_path(path); -- cgit v1.2.3 From d062d13cf1a100d5632306a28f9a9f54aced1280 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 30 Jul 2013 15:44:09 -0400 Subject: Btrfs: check to see if we have an inline item properly If our item isn't big enough to have an actual inline item when we have skinny metadata enabled just return 1 in find_inline_backref so we can move on to the next item. This probably wasn't causing a problem since we check the values of ptr and end properly, but just in case this will keep us from doing extra work. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5a23d875ab40..8168fb351adb 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); return 1; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + item_size <= sizeof(*ei)) { + WARN_ON(item_size < sizeof(*ei)); + return 1; + } if (key.type == BTRFS_EXTENT_ITEM_KEY) { bi = (struct btrfs_tree_block_info *)(ei + 1); -- cgit v1.2.3 From b6c60c8018c4e9beb2f83fc82c09f9d033766571 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 30 Jul 2013 16:30:30 -0400 Subject: Btrfs: change how we queue blocks for backref checking Previously we only added blocks to the list to have their backrefs checked if the level of the block is right above the one we are searching for. This is because we want to make sure we don't add the entire path up to the root to the lists to make sure we process things one at a time. This assumes that if any blocks in the path to the root are going to be not checked (shared in other words) then they will be in the level right above the current block on up. This isn't quite right though since we can have blocks higher up the list that are shared because they are attached to a reloc root. But we won't add this block to be checked and then later on we will BUG_ON(!upper->checked). So instead keep track of wether or not we've queued a block to be checked in this current search, and if we haven't go ahead and queue it to be checked. This patch fixed the panic I was seeing where we BUG_ON(!upper->checked). Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8168fb351adb..2ec59c4c51b7 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -696,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -919,6 +920,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -962,14 +964,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else -- cgit v1.2.3 From 6596a9281995a3c7dee8ca6666bd169fffc928e1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 31 Jul 2013 10:28:05 -0400 Subject: Btrfs: don't bug_on when we fail when cleaning up transactions There is no reason for this sort of jackassery. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18f7e71d1593..ea8d52212d75 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1942,6 +1942,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) * If we encounter a transaction abort during snapshot cleaning, we * don't want to crash here */ - BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS); - return 1; + return (ret < 0) ? 0 : 1; } -- cgit v1.2.3 From 395927a9d8e64af518c2ccdbfbbdf9184b558315 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 30 Jul 2013 12:03:04 +0100 Subject: Btrfs: optimize function btrfs_read_chunk_tree After reading all device items from the chunk tree, don't exit the loop and then navigate down the tree again to find the chunk items. Instead just read all device items and chunk items with a single tree search. This is possible because all device items are found before any chunk item in the chunks tree. Signed-off-by: Filipe David Borba Manana Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 557a7438f929..93f39b1e7173 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5646,14 +5646,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) mutex_lock(&uuid_mutex); lock_chunks(root); - /* first we search for all of the device items, and then we - * read in all of the chunk items. This way we can create chunk - * mappings that reference all of the devices that are afound + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). */ key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.offset = 0; key.type = 0; -again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto error; @@ -5669,17 +5670,13 @@ again: break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) - break; - if (found_key.type == BTRFS_DEV_ITEM_KEY) { - struct btrfs_dev_item *dev_item; - dev_item = btrfs_item_ptr(leaf, slot, + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(root, leaf, dev_item); - if (ret) - goto error; - } + ret = read_one_dev(root, leaf, dev_item); + if (ret) + goto error; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -5689,11 +5686,6 @@ again: } path->slots[0]++; } - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - key.objectid = 0; - btrfs_release_path(path); - goto again; - } ret = 0; error: unlock_chunks(root); -- cgit v1.2.3 From f5929cd814ca0a7012bf31d566c8bbd0cd469322 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 31 Jul 2013 00:26:35 +0100 Subject: Btrfs: add missing error check to find_parent_nodes Signed-off-by: Filipe David Borba Manana Reviewed-by: Jan Schmidt Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8bc5e8ccb091..980e85a264c0 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -935,8 +935,10 @@ again: } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); - ref->inode_list = eie; free_extent_buffer(eb); + if (ret < 0) + goto out; + ref->inode_list = eie; } ret = ulist_add_merge(refs, ref->parent, (uintptr_t)ref->inode_list, -- cgit v1.2.3 From eb2067f713ff1c6d867020e85f95fba7d000ed1a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 30 Jul 2013 13:42:17 -0400 Subject: Fix leak in __btrfs_map_block error path If we bail out when the stripe alloc fails, we need to undo the earlier allocation of raid_map. Signed-off-by: Dave Jones Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 93f39b1e7173..44abd151132a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4673,6 +4673,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); if (!bbio) { + kfree(raid_map); ret = -ENOMEM; goto out; } -- cgit v1.2.3 From 0f0fe8f710f29dbd4b2c915fc1c36962e4957b3b Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 31 Jul 2013 00:39:56 +0100 Subject: Btrfs: add missing error handling to read_tree_block Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 13564354c927..2a378abb03fb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1149,6 +1149,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + if (ret) { + free_extent_buffer(buf); + return NULL; + } return buf; } -- cgit v1.2.3 From 36cce922875563a1e2a4b6a53fbe1147f652a51e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 5 Aug 2013 11:15:21 -0400 Subject: Btrfs: handle errors when doing slow caching Alex Lyakas reported a bug where wait_block_group_cache_progress() would wait forever if a drive failed. This is because we just bail out if there is an error while trying to cache a block group, we don't update anybody who may be waiting. So this introduces a new enum for the cache state in case of error and makes everybody bail out if we have an error. Alex tested and verified this patch fixed his problem. This fixes bz 59431. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 32 +++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cbb1263752f8..c17acbce5bc3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1188,6 +1188,7 @@ enum btrfs_caching_type { BTRFS_CACHE_STARTED = 1, BTRFS_CACHE_FAST = 2, BTRFS_CACHE_FINISHED = 3, + BTRFS_CACHE_ERROR = 4, }; enum btrfs_disk_cache_state { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e868c35f760c..a073f3ece43a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -113,7 +113,8 @@ static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) { smp_mb(); - return cache->cached == BTRFS_CACHE_FINISHED; + return cache->cached == BTRFS_CACHE_FINISHED || + cache->cached == BTRFS_CACHE_ERROR; } static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) @@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work) u64 total_found = 0; u64 last = 0; u32 nritems; - int ret = 0; + int ret = -ENOMEM; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; @@ -517,6 +518,12 @@ err: mutex_unlock(&caching_ctl->mutex); out: + if (ret) { + spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; + block_group->cached = BTRFS_CACHE_ERROR; + spin_unlock(&block_group->lock); + } wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); @@ -6035,8 +6042,11 @@ static u64 stripe_align(struct btrfs_root *root, * for our min num_bytes. Another option is to have it go ahead * and look in the rbtree for a free extent of a given size, but this * is a good start. + * + * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using + * any of the information in this block group. */ -static noinline int +static noinline void wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { @@ -6044,28 +6054,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return; wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space_ctl->free_space >= num_bytes)); put_caching_control(caching_ctl); - return 0; } static noinline int wait_block_group_cache_done(struct btrfs_block_group_cache *cache) { struct btrfs_caching_control *caching_ctl; + int ret = 0; caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; wait_event(caching_ctl->wait, block_group_cache_done(cache)); - + if (cache->cached == BTRFS_CACHE_ERROR) + ret = -EIO; put_caching_control(caching_ctl); - return 0; + return ret; } int __get_raid_index(u64 flags) @@ -6248,6 +6259,8 @@ have_block_group: ret = 0; } + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + goto loop; if (unlikely(block_group->ro)) goto loop; @@ -8230,7 +8243,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) * We haven't cached this block group, which means we could * possibly have excluded extents on this block group. */ - if (block_group->cached == BTRFS_CACHE_NO) + if (block_group->cached == BTRFS_CACHE_NO || + block_group->cached == BTRFS_CACHE_ERROR) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); -- cgit v1.2.3 From ebdad913aa9c86a63d3be28b4610e143204c6f3c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 6 Aug 2013 16:47:48 -0400 Subject: Btrfs: check our parent dir when doing a compare send When doing a send with a parent subvol we will check to see if the file we are acting on is being overwritten and move it if we think it may be needed further down the line during the send. We check this by checking its directory and making sure it existed in the parent and making sure the file existed in the parent. The problem with this check is that if we create a directory and a file in that directory, and then snapshot, and then remove and re-create that same directory and file with different inode numbers and then try to snapshot and send with the original parent we will try and save the original file inside of that directory. This is a problem because during the receive we move the directory out of the way because it is a completely new inode, which makes us unable to find the old file inside of the directory when we try to move that out of the way for the overwrite. We fix this by checking the parent directory of the inode we think we are overwriting. If the parent directory generation in the send root != the parent directory generation in the parent root then we know it is a completely new directory and we need not bother with moving the file out of the way because it would have been completely destroyed. This fixes bz 60673. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index d3f3b43cae0b..0efc2e2f253c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1668,6 +1668,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, u64 *who_ino, u64 *who_gen) { int ret = 0; + u64 gen; u64 other_inode = 0; u8 other_type = 0; @@ -1678,6 +1679,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, if (ret <= 0) goto out; + /* + * If we have a parent root we need to verify that the parent dir was + * not delted and then re-created, if it was then we have no overwrite + * and we can just unlink this entry. + */ + if (sctx->parent_root) { + ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, + NULL, NULL, NULL); + if (ret < 0 && ret != -ENOENT) + goto out; + if (ret) { + ret = 0; + goto out; + } + if (gen != dir_gen) + goto out; + } + ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, &other_inode, &other_type); if (ret < 0 && ret != -ENOENT) -- cgit v1.2.3 From db7f3436c1c186f8271018751fcb338cf3706e8d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Aug 2013 14:54:37 -0400 Subject: Btrfs: deal with enomem in the rewind path We can get ENOMEM trying to allocate dummy bufs for the rewind operation of the tree mod log. Instead of BUG_ON()'ing in this case pass up ENOMEM. I looked back through the callers and I'm pretty sure I got everybody who did BUG_ON(ret) in this path. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 16 +++++- fs/btrfs/extent_io.c | 145 ++++++++++++++++++++++++++------------------------- 2 files changed, 88 insertions(+), 73 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d5c686f2b98..1dd8a71f567d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1211,7 +1211,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb->start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1219,7 +1223,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } } btrfs_tree_read_unlock(eb); @@ -2772,6 +2780,10 @@ again: BTRFS_READ_LOCK); } b = tree_mod_log_rewind(root->fs_info, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p->locks[level] = BTRFS_READ_LOCK; p->nodes[level] = b; } else { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c137f98fbd24..ef25c7d048e2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4222,6 +4222,76 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } +static int extent_buffer_under_io(struct extent_buffer *eb) +{ + return (atomic_read(&eb->io_pages) || + test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || + test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); +} + +/* + * Helper for releasing extent buffer page. + */ +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, + unsigned long start_idx) +{ + unsigned long index; + unsigned long num_pages; + struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + + BUG_ON(extent_buffer_under_io(eb)); + + num_pages = num_extent_pages(eb->start, eb->len); + index = start_idx + num_pages; + if (start_idx >= index) + return; + + do { + index--; + page = extent_buffer_page(eb, index); + if (page && mapped) { + spin_lock(&page->mapping->private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) && + page->private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); + /* + * We need to make sure we haven't be attached + * to a new eb. + */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ + page_cache_release(page); + } + spin_unlock(&page->mapping->private_lock); + + } + if (page) { + /* One for when we alloced the page */ + page_cache_release(page); + } + } while (index != start_idx); +} + +/* + * Helper for releasing the extent buffer. + */ +static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) +{ + btrfs_release_extent_buffer_page(eb, 0); + __free_extent_buffer(eb); +} + static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, @@ -4276,7 +4346,10 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) for (i = 0; i < num_pages; i++) { p = alloc_page(GFP_ATOMIC); - BUG_ON(!p); + if (!p) { + btrfs_release_extent_buffer(new); + return NULL; + } attach_extent_buffer_page(new, p); WARN_ON(PageDirty(p)); SetPageUptodate(p); @@ -4317,76 +4390,6 @@ err: return NULL; } -static int extent_buffer_under_io(struct extent_buffer *eb) -{ - return (atomic_read(&eb->io_pages) || - test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || - test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); -} - -/* - * Helper for releasing extent buffer page. - */ -static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, - unsigned long start_idx) -{ - unsigned long index; - unsigned long num_pages; - struct page *page; - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); - - BUG_ON(extent_buffer_under_io(eb)); - - num_pages = num_extent_pages(eb->start, eb->len); - index = start_idx + num_pages; - if (start_idx >= index) - return; - - do { - index--; - page = extent_buffer_page(eb, index); - if (page && mapped) { - spin_lock(&page->mapping->private_lock); - /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. - */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - } - if (page) { - /* One for when we alloced the page */ - page_cache_release(page); - } - } while (index != start_idx); -} - -/* - * Helper for releasing the extent buffer. - */ -static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) -{ - btrfs_release_extent_buffer_page(eb, 0); - __free_extent_buffer(eb); -} - static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; -- cgit v1.2.3 From 9ec726775188906192f78ab9187640afd81ab996 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Aug 2013 16:57:23 -0400 Subject: Btrfs: stop using GFP_ATOMIC when allocating rewind ebs There is no reason we can't just set the path to blocking and then do normal GFP_NOFS allocations for these extent buffers. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 19 ++++++++++++------- fs/btrfs/extent_io.c | 8 ++++---- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1dd8a71f567d..5f7a97556583 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, } else { eb_rewin = btrfs_clone_extent_buffer(eb); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); extent_buffer_get(eb_rewin); @@ -1294,8 +1298,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); eb = alloc_dummy_extent_buffer(logical, root->nodesize); } else { + btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); - btrfs_tree_read_unlock(eb_root); + btrfs_tree_read_unlock_blocking(eb_root); free_extent_buffer(eb_root); } @@ -2779,7 +2784,7 @@ again: btrfs_clear_path_blocking(p, b, BTRFS_READ_LOCK); } - b = tree_mod_log_rewind(root->fs_info, b, time_seq); + b = tree_mod_log_rewind(root->fs_info, p, b, time_seq); if (!b) { ret = -ENOMEM; goto done; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ef25c7d048e2..a8d662b31830 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4340,12 +4340,12 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src->start, src->len); - new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); + new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); if (new == NULL) return NULL; for (i = 0; i < num_pages; i++) { - p = alloc_page(GFP_ATOMIC); + p = alloc_page(GFP_NOFS); if (!p) { btrfs_release_extent_buffer(new); return NULL; @@ -4369,12 +4369,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) unsigned long num_pages = num_extent_pages(0, len); unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); + eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); if (!eb) return NULL; for (i = 0; i < num_pages; i++) { - eb->pages[i] = alloc_page(GFP_ATOMIC); + eb->pages[i] = alloc_page(GFP_NOFS); if (!eb->pages[i]) goto err; } -- cgit v1.2.3 From 8b87dc17fbc7443bf4c6c096279c35e89fb51326 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 1 Aug 2013 18:14:52 +0200 Subject: btrfs: add mount option to set commit interval I'ts hardcoded to 30 seconds which is fine for most users. Higher values defer data being synced to permanent storage with obvious consequences when the system crashes. The upper bound is not forced, but a warning is printed if it's more than 300 seconds (5 minutes). Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/disk-io.c | 6 ++++-- fs/btrfs/super.c | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c17acbce5bc3..0632832d4446 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1351,6 +1351,7 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; unsigned long mount_opt; unsigned long compress_type:4; + int commit_interval; /* * It is a suggestive number, the read side is safe even it gets a * wrong number because we will write out the data into a regular @@ -1969,6 +1970,8 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) +#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) + #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2a378abb03fb..572e21eb644c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1742,7 +1742,7 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; - delay = HZ * 30; + delay = HZ * root->fs_info->commit_interval; mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); @@ -1754,7 +1754,8 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && - (now < cur->start_time || now - cur->start_time < 30)) { + (now < cur->start_time || + now - cur->start_time < root->fs_info->commit_interval)) { spin_unlock(&root->fs_info->trans_lock); delay = HZ * 5; goto sleep; @@ -2194,6 +2195,7 @@ int open_ctree(struct super_block *sb, fs_info->defrag_inodes = RB_ROOT; fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; + fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2cc5b80eeae9..196790375bb3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -321,6 +321,7 @@ enum { Opt_no_space_cache, Opt_recovery, Opt_skip_balance, Opt_check_integrity, Opt_check_integrity_including_extent_data, Opt_check_integrity_print_mask, Opt_fatal_errors, + Opt_commit_interval, Opt_err, }; @@ -361,6 +362,7 @@ static match_table_t tokens = { {Opt_check_integrity_including_extent_data, "check_int_data"}, {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, {Opt_fatal_errors, "fatal_errors=%s"}, + {Opt_commit_interval, "commit=%d"}, {Opt_err, NULL}, }; @@ -645,6 +647,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) goto out; } break; + case Opt_commit_interval: + intarg = 0; + ret = match_int(&args[0], &intarg); + if (ret < 0) { + printk(KERN_ERR + "btrfs: invalid commit interval\n"); + ret = -EINVAL; + goto out; + } + if (intarg > 0) { + if (intarg > 300) { + printk(KERN_WARNING + "btrfs: excessive commit interval %d\n", + intarg); + } + info->commit_interval = intarg; + } else { + printk(KERN_INFO + "btrfs: using default commit interval %ds\n", + BTRFS_DEFAULT_COMMIT_INTERVAL); + info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; + } + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); @@ -981,6 +1006,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) info->metadata_ratio); if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); + if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) + seq_printf(seq, ",commit=%d", info->commit_interval); return 0; } -- cgit v1.2.3 From 116e0024c453dfdfacc9278f485dabc9c63a4ac4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 2 Aug 2013 16:30:40 +0800 Subject: Btrfs: allow compressed extents to be merged during defragment The rule originally comes from nocow writing, but snapshot-aware defrag is a different case, the extent has been writen and we're not going to change the extent but add a reference on the data. So we're able to allow such compressed extents to be merged into one bigger extent if they're pointing to the same data. Reviewed-by: Miao Xie Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 27fa81977f86..66571dce88d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2212,16 +2212,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, static int relink_is_mergable(struct extent_buffer *leaf, struct btrfs_file_extent_item *fi, - u64 disk_bytenr) + struct new_sa_defrag_extent *new) { - if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr) + if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) return 0; if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) return 0; - if (btrfs_file_extent_compression(leaf, fi) || - btrfs_file_extent_encryption(leaf, fi) || + if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) + return 0; + + if (btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) return 0; @@ -2365,8 +2367,8 @@ again: struct btrfs_file_extent_item); extent_len = btrfs_file_extent_num_bytes(leaf, fi); - if (relink_is_mergable(leaf, fi, new->bytenr) && - extent_len + found_key.offset == start) { + if (extent_len + found_key.offset == start && + relink_is_mergable(leaf, fi, new)) { btrfs_set_file_extent_num_bytes(leaf, fi, extent_len + len); btrfs_mark_buffer_dirty(leaf); -- cgit v1.2.3 From 2bac325ea8f117636e6601e66a67d2df58974610 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sun, 4 Aug 2013 19:58:57 +0100 Subject: Btrfs: fix inode leak on kmalloc failure in tree-log.c In tree-log.c:replay_one_name(), if memory allocation for the name fails, ensure we iput the dir inode we got before before we return. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ff60d8978ae2..6797a931cf62 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1536,8 +1536,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); - if (!name) - return -ENOMEM; + if (!name) { + ret = -ENOMEM; + goto out; + } log_type = btrfs_dir_type(eb, di); read_extent_buffer(eb, name, (unsigned long)(di + 1), -- cgit v1.2.3 From ada9af215cf7ce663ff3bf9d255923d75b57a4ce Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 5 Aug 2013 09:25:47 +0100 Subject: Btrfs: don't ignore errors from btrfs_run_delayed_items tree-log.c was ignoring the return value from btrfs_run_delayed_items() in several places. Signed-off-by: Filipe David Borba Manana Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6797a931cf62..0d9613c3f5e5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -747,7 +747,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (ret) goto out; - btrfs_run_delayed_items(trans, root); + else + ret = btrfs_run_delayed_items(trans, root); out: kfree(name); iput(inode); @@ -923,7 +924,9 @@ again: kfree(victim_name); if (ret) return ret; - btrfs_run_delayed_items(trans, root); + ret = btrfs_run_delayed_items(trans, root); + if (ret) + return ret; *search_done = 1; goto again; } @@ -990,7 +993,9 @@ again: inode, victim_name, victim_name_len); - btrfs_run_delayed_items(trans, root); + if (!ret) + ret = btrfs_run_delayed_items( + trans, root); } iput(victim_parent); kfree(victim_name); @@ -1812,7 +1817,7 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) - btrfs_run_delayed_items(trans, root); + ret = btrfs_run_delayed_items(trans, root); kfree(name); iput(inode); if (ret) -- cgit v1.2.3 From 09fb99a696412ae0fceeafc06c987903416503b9 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 5 Aug 2013 16:25:12 +0100 Subject: Btrfs: return ENOSPC when target space is full In extent-tree.c:do_chunk_alloc(), early on we returned 0 (success) when the target space was full and when chunk allocation is needed. However, later on in that same function we return ENOSPC if btrfs_alloc_chunk() fails (and chunk allocation was needed) and set the space's full flag. This was inconsistent, as -ENOSPC should be returned if the space is full and a chunk allocation needs to performed. If the space is full but no chunk allocation is needed, just return 0 (success). Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a073f3ece43a..277d2c26b034 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3836,8 +3836,12 @@ again: if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { + if (should_alloc_chunk(extent_root, space_info, force)) + ret = -ENOSPC; + else + ret = 0; spin_unlock(&space_info->lock); - return 0; + return ret; } if (!should_alloc_chunk(extent_root, space_info, force)) { -- cgit v1.2.3 From a4fdb61e81e73991d919ff0396d256e9e67d2475 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 8 Aug 2013 13:04:17 +0800 Subject: Btrfs: fix possible memory leak in find_parent_nodes() Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 980e85a264c0..ae798c1d088b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -911,7 +911,6 @@ again: while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); - list_del(&ref->list); WARN_ON(ref->count < 0); if (ref->count && ref->root_id && ref->parent == 0) { /* no parent == root of tree */ @@ -956,6 +955,7 @@ again: eie->next = ref->inode_list; } } + list_del(&ref->list); kfree(ref); } -- cgit v1.2.3 From 77fe20dc6291e10c3594124b4505161e701a3cc4 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:48 -0700 Subject: btrfs: abtract out range locking in clone ioctl() The range locking in btrfs_ioctl_clone is trivially broken out into it's own function. This reduces the complexity of btrfs_ioctl_clone() by a small bit and makes that locking code available to future functions in fs/btrfs/ioctl.c Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 556b3d5b18da..76c7657a7f53 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2470,6 +2470,26 @@ out: return ret; } +static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) +{ + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + struct btrfs_ordered_extent *ordered; + lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + ordered = btrfs_lookup_first_ordered_extent(inode, + off + len - 1); + if (!ordered && + !test_range_bit(&BTRFS_I(inode)->io_tree, off, + off + len - 1, EXTENT_DELALLOC, 0, NULL)) + break; + unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + if (ordered) + btrfs_put_ordered_extent(ordered); + btrfs_wait_ordered_range(inode, off, len); + } +} + static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { @@ -2598,21 +2618,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, truncate_inode_pages_range(&inode->i_data, destoff, PAGE_CACHE_ALIGN(destoff + len) - 1); - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ - while (1) { - struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); - if (!ordered && - !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, - EXTENT_DELALLOC, 0, NULL)) - break; - unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - if (ordered) - btrfs_put_ordered_extent(ordered); - btrfs_wait_ordered_range(src, off, len); - } + lock_extent_range(src, off, len); /* clone data */ key.objectid = btrfs_ino(src); -- cgit v1.2.3 From 32b7c687c52a0b78a7d41ce6423c2fd8f5ca92da Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:49 -0700 Subject: btrfs_ioctl_clone: Move clone code into it's own function There's some 250+ lines here that are easily encapsulated into their own function. I don't change how anything works here, just create and document the new btrfs_clone() function from btrfs_ioctl_clone() code. Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 254 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 139 insertions(+), 115 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 76c7657a7f53..5b5148a1b0d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2490,136 +2490,43 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +/** + * btrfs_clone() - clone a range from inode file to another + * + * @src: Inode to clone from + * @inode: Inode to clone to + * @off: Offset within source to start clone from + * @olen: Original length, passed by user, of range to clone + * @olen_aligned: Block-aligned value of olen, extent_same uses + * identical values here + * @destoff: Offset within @inode to start clone + */ +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff) { - struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; - struct fd src_file; - struct inode *src; - struct btrfs_trans_handle *trans; - struct btrfs_path *path; + struct btrfs_path *path = NULL; struct extent_buffer *leaf; - char *buf; + struct btrfs_trans_handle *trans; + char *buf = NULL; struct btrfs_key key; u32 nritems; int slot; int ret; - u64 len = olen; - u64 bs = root->fs_info->sb->s_blocksize; - int same_inode = 0; - - /* - * TODO: - * - split compressed inline extents. annoying: we need to - * decompress into destination's address_space (the file offset - * may change, so source mapping won't do), then recompress (or - * otherwise reinsert) a subrange. - * - allow ranges within the same file to be cloned (provided - * they don't overlap)? - */ - - /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) - return -EINVAL; - - if (btrfs_root_readonly(root)) - return -EROFS; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; - } - - ret = -EXDEV; - if (src_file.file->f_path.mnt != file->f_path.mnt) - goto out_fput; - - src = file_inode(src_file.file); - - ret = -EINVAL; - if (src == inode) - same_inode = 1; - - /* the src must be open for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) - goto out_fput; - - /* don't make the dst file partly checksummed */ - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != - (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) - goto out_fput; - - ret = -EISDIR; - if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) - goto out_fput; - - ret = -EXDEV; - if (src->i_sb != inode->i_sb) - goto out_fput; + u64 len = olen_aligned; ret = -ENOMEM; buf = vmalloc(btrfs_level_size(root, 0)); if (!buf) - goto out_fput; + return ret; path = btrfs_alloc_path(); if (!path) { vfree(buf); - goto out_fput; - } - path->reada = 2; - - if (!same_inode) { - if (inode < src) { - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - } - } else { - mutex_lock(&src->i_mutex); - } - - /* determine range to clone */ - ret = -EINVAL; - if (off + len > src->i_size || off + len < off) - goto out_unlock; - if (len == 0) - olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) - len = ALIGN(src->i_size, bs) - off; - - /* verify the end result is block aligned */ - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || - !IS_ALIGNED(destoff, bs)) - goto out_unlock; - - /* verify if ranges are overlapped within the same file */ - if (same_inode) { - if (destoff + len > off && destoff < off + len) - goto out_unlock; - } - - if (destoff > inode->i_size) { - ret = btrfs_cont_expand(inode, inode->i_size, destoff); - if (ret) - goto out_unlock; + return ret; } - /* truncate page cache pages from target inode range */ - truncate_inode_pages_range(&inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len) - 1); - - lock_extent_range(src, off, len); - + path->reada = 2; /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; @@ -2865,15 +2772,132 @@ next: key.offset++; } ret = 0; + out: btrfs_release_path(path); + btrfs_free_path(path); + vfree(buf); + return ret; +} + +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct fd src_file; + struct inode *src; + int ret; + u64 len = olen; + u64 bs = root->fs_info->sb->s_blocksize; + int same_inode = 0; + + /* + * TODO: + * - split compressed inline extents. annoying: we need to + * decompress into destination's address_space (the file offset + * may change, so source mapping won't do), then recompress (or + * otherwise reinsert) a subrange. + * - allow ranges within the same file to be cloned (provided + * they don't overlap)? + */ + + /* the destination must be opened for writing */ + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + return -EINVAL; + + if (btrfs_root_readonly(root)) + return -EROFS; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + ret = -EXDEV; + if (src_file.file->f_path.mnt != file->f_path.mnt) + goto out_fput; + + src = file_inode(src_file.file); + + ret = -EINVAL; + if (src == inode) + same_inode = 1; + + /* the src must be open for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) + goto out_fput; + + ret = -EISDIR; + if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (!same_inode) { + if (inode < src) { + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + } + } else { + mutex_lock(&src->i_mutex); + } + + /* determine range to clone */ + ret = -EINVAL; + if (off + len > src->i_size || off + len < off) + goto out_unlock; + if (len == 0) + olen = len = src->i_size - off; + /* if we extend to eof, continue to block boundary */ + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) + goto out_unlock; + + /* verify if ranges are overlapped within the same file */ + if (same_inode) { + if (destoff + len > off && destoff < off + len) + goto out_unlock; + } + + if (destoff > inode->i_size) { + ret = btrfs_cont_expand(inode, inode->i_size, destoff); + if (ret) + goto out_unlock; + } + + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len) - 1); + + lock_extent_range(src, off, len); + + ret = btrfs_clone(src, inode, off, olen, len, destoff); + unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); out_unlock: mutex_unlock(&src->i_mutex); if (!same_inode) mutex_unlock(&inode->i_mutex); - vfree(buf); - btrfs_free_path(path); out_fput: fdput(src_file); out_drop_write: -- cgit v1.2.3 From 4b384318a74e38eb248f74f9a92a700d2ce841f1 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:50 -0700 Subject: btrfs: Introduce extent_read_full_page_nolock() We want this for btrfs_extent_same. Basically readpage and friends do their own extent locking but for the purposes of dedupe, we want to have both files locked down across a set of readpage operations (so that we can compare data). Introduce this variant and a flag which can be set for extent_read_full_page() to indicate that we are already locked. Partial credit for this patch goes to Gabriel de Perthuis as I have included a fix from him to the original patch which avoids a deadlock on compressed extents. Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 6 +++++- fs/btrfs/extent_io.c | 37 +++++++++++++++++++++++++++++-------- fs/btrfs/extent_io.h | 3 +++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b189bd1e7a3e..37dff5b2d56d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -639,7 +639,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, faili = nr_pages - 1; cb->nr_pages = nr_pages; - add_ra_bio_pages(inode, em_start + em_len, cb); + /* In the parent-locked case, we only locked the range we are + * interested in. In all other cases, we can opportunistically + * cache decompressed data that goes beyond the requested range. */ + if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED)) + add_ra_bio_pages(inode, em_start + em_len, cb); /* include any pages we added in add_ra-bio_pages */ uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a8d662b31830..df6e6c27782e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2764,11 +2764,12 @@ static int __do_readpage(struct extent_io_tree *tree, struct block_device *bdev; int ret; int nr = 0; + int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED; size_t pg_offset = 0; size_t iosize; size_t disk_io_size; size_t blocksize = inode->i_sb->s_blocksize; - unsigned long this_bio_flag = 0; + unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; set_page_extent_mapped(page); @@ -2807,15 +2808,18 @@ static int __do_readpage(struct extent_io_tree *tree, kunmap_atomic(userpage); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); - unlock_extent_cached(tree, cur, cur + iosize - 1, - &cached, GFP_NOFS); + if (!parent_locked) + unlock_extent_cached(tree, cur, + cur + iosize - 1, + &cached, GFP_NOFS); break; } em = __get_extent_map(inode, page, pg_offset, cur, end - cur + 1, get_extent, em_cached); if (IS_ERR_OR_NULL(em)) { SetPageError(page); - unlock_extent(tree, cur, end); + if (!parent_locked) + unlock_extent(tree, cur, end); break; } extent_offset = cur - em->start; @@ -2823,7 +2827,7 @@ static int __do_readpage(struct extent_io_tree *tree, BUG_ON(end < cur); if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - this_bio_flag = EXTENT_BIO_COMPRESSED; + this_bio_flag |= EXTENT_BIO_COMPRESSED; extent_set_compress_type(&this_bio_flag, em->compress_type); } @@ -2867,7 +2871,8 @@ static int __do_readpage(struct extent_io_tree *tree, if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2877,7 +2882,8 @@ static int __do_readpage(struct extent_io_tree *tree, */ if (block_start == EXTENT_MAP_INLINE) { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2895,7 +2901,8 @@ static int __do_readpage(struct extent_io_tree *tree, *bio_flags = this_bio_flag; } else { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); } cur = cur + iosize; pg_offset += iosize; @@ -3021,6 +3028,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, return ret; } +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num) +{ + struct bio *bio = NULL; + unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED; + int ret; + + ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, + &bio_flags, READ); + if (bio) + ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + return ret; +} + static noinline void update_nr_written(struct page *page, struct writeback_control *wbc, unsigned long nr_written) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c450620a333f..3940a0386865 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -29,6 +29,7 @@ */ #define EXTENT_BIO_COMPRESSED 1 #define EXTENT_BIO_TREE_LOG 2 +#define EXTENT_BIO_PARENT_LOCKED 4 #define EXTENT_BIO_FLAG_SHIFT 16 /* these are bit numbers for test/set bit */ @@ -199,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num); int __init extent_io_init(void); void extent_io_exit(void); -- cgit v1.2.3 From 416161db9b63e353a8fb79d1369779175102fca1 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:51 -0700 Subject: btrfs: offline dedupe This patch adds an ioctl, BTRFS_IOC_FILE_EXTENT_SAME which will try to de-duplicate a list of extents across a range of files. Internally, the ioctl re-uses code from the clone ioctl. This avoids rewriting a large chunk of extent handling code. Userspace passes in an array of file, offset pairs along with a length argument. The ioctl will then (for each dedupe) do a byte-by-byte comparison of the user data before deduping the extent. Status and number of bytes deduped are returned for each operation. Signed-off-by: Mark Fasheh Reviewed-by: Zach Brown Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 279 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/btrfs.h | 28 +++++ 2 files changed, 307 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5b5148a1b0d3..022d8364e072 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -57,6 +58,9 @@ #include "send.h" #include "dev-replace.h" +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff); + /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) { @@ -2470,6 +2474,34 @@ out: return ret; } +static struct page *extent_same_get_page(struct inode *inode, u64 off) +{ + struct page *page; + pgoff_t index; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + + index = off >> PAGE_CACHE_SHIFT; + + page = grab_cache_page(inode->i_mapping, index); + if (!page) + return NULL; + + if (!PageUptodate(page)) { + if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, + 0)) + return NULL; + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return NULL; + } + } + unlock_page(page); + + return page; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2490,6 +2522,251 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } +static void btrfs_double_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); + + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); +} + +static void btrfs_double_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + if (inode1 < inode2) { + swap(inode1, inode2); + swap(loff1, loff2); + } + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + lock_extent_range(inode1, loff1, len); + if (inode1 != inode2) { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + lock_extent_range(inode2, loff2, len); + } +} + +static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, + u64 dst_loff, u64 len) +{ + int ret = 0; + struct page *src_page, *dst_page; + unsigned int cmp_len = PAGE_CACHE_SIZE; + void *addr, *dst_addr; + + while (len) { + if (len < PAGE_CACHE_SIZE) + cmp_len = len; + + src_page = extent_same_get_page(src, loff); + if (!src_page) + return -EINVAL; + dst_page = extent_same_get_page(dst, dst_loff); + if (!dst_page) { + page_cache_release(src_page); + return -EINVAL; + } + addr = kmap_atomic(src_page); + dst_addr = kmap_atomic(dst_page); + + flush_dcache_page(src_page); + flush_dcache_page(dst_page); + + if (memcmp(addr, dst_addr, cmp_len)) + ret = BTRFS_SAME_DATA_DIFFERS; + + kunmap_atomic(addr); + kunmap_atomic(dst_addr); + page_cache_release(src_page); + page_cache_release(dst_page); + + if (ret) + break; + + loff += cmp_len; + dst_loff += cmp_len; + len -= cmp_len; + } + + return ret; +} + +static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) +{ + u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; + + if (off + len > inode->i_size || off + len < off) + return -EINVAL; + /* Check that we are block aligned - btrfs_clone() requires this */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) + return -EINVAL; + + return 0; +} + +static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, + struct inode *dst, u64 dst_loff) +{ + int ret; + + /* + * btrfs_clone() can't handle extents in the same file + * yet. Once that works, we can drop this check and replace it + * with a check for the same inode, but overlapping extents. + */ + if (src == dst) + return -EINVAL; + + btrfs_double_lock(src, loff, dst, dst_loff, len); + + ret = extent_same_check_offsets(src, loff, len); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, len); + if (ret) + goto out_unlock; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { + ret = -EINVAL; + goto out_unlock; + } + + ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + if (ret == 0) + ret = btrfs_clone(src, dst, loff, len, len, dst_loff); + +out_unlock: + btrfs_double_unlock(src, loff, dst, dst_loff, len); + + return ret; +} + +#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) + +static long btrfs_ioctl_file_extent_same(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_same_args *args = argp; + struct btrfs_ioctl_same_args same; + struct btrfs_ioctl_same_extent_info info; + struct inode *src = file->f_dentry->d_inode; + struct file *dst_file = NULL; + struct inode *dst; + u64 off; + u64 len; + int i; + int ret; + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + bool is_admin = capable(CAP_SYS_ADMIN); + + if (!(file->f_mode & FMODE_READ)) + return -EINVAL; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + if (copy_from_user(&same, + (struct btrfs_ioctl_same_args __user *)argp, + sizeof(same))) { + ret = -EFAULT; + goto out; + } + + off = same.logical_offset; + len = same.length; + + /* + * Limit the total length we will dedupe for each operation. + * This is intended to bound the total time spent in this + * ioctl to something sane. + */ + if (len > BTRFS_MAX_DEDUPE_LEN) + len = BTRFS_MAX_DEDUPE_LEN; + + if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + ret = -EINVAL; + goto out; + } + + ret = -EISDIR; + if (S_ISDIR(src->i_mode)) + goto out; + + ret = -EACCES; + if (!S_ISREG(src->i_mode)) + goto out; + + ret = 0; + for (i = 0; i < same.dest_count; i++) { + if (copy_from_user(&info, &args->info[i], sizeof(info))) { + ret = -EFAULT; + goto out; + } + + info.bytes_deduped = 0; + + dst_file = fget(info.fd); + if (!dst_file) { + info.status = -EBADF; + goto next; + } + + if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + info.status = -EINVAL; + goto next; + } + + info.status = -EXDEV; + if (file->f_path.mnt != dst_file->f_path.mnt) + goto next; + + dst = dst_file->f_dentry->d_inode; + if (src->i_sb != dst->i_sb) + goto next; + + if (S_ISDIR(dst->i_mode)) { + info.status = -EISDIR; + goto next; + } + + if (!S_ISREG(dst->i_mode)) { + info.status = -EACCES; + goto next; + } + + info.status = btrfs_extent_same(src, off, len, dst, + info.logical_offset); + if (info.status == 0) + info.bytes_deduped += len; + +next: + if (dst_file) + fput(dst_file); + + if (__put_user_unaligned(info.status, &args->info[i].status) || + __put_user_unaligned(info.bytes_deduped, + &args->info[i].bytes_deduped)) { + ret = -EFAULT; + goto out; + } + } + +out: + mnt_drop_write_file(file); + return ret; +} + /** * btrfs_clone() - clone a range from inode file to another * @@ -4242,6 +4519,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_fslabel(file, argp); case BTRFS_IOC_SET_FSLABEL: return btrfs_ioctl_set_fslabel(file, argp); + case BTRFS_IOC_FILE_EXTENT_SAME: + return btrfs_ioctl_file_extent_same(file, argp); } return -ENOTTY; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 05aed70627e2..90d7bd9d839c 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -305,6 +305,31 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_SAME_DATA_DIFFERS 1 +/* For extent-same ioctl */ +struct btrfs_ioctl_same_extent_info { + __s64 fd; /* in - destination file */ + __u64 logical_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file */ + /* status of this dedupe operation: + * 0 if dedup succeeds + * < 0 for error + * == BTRFS_SAME_DATA_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; +}; + +struct btrfs_ioctl_same_args { + __u64 logical_offset; /* in - start of extent in source */ + __u64 length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; + __u32 reserved2; + struct btrfs_ioctl_same_extent_info info[0]; +}; + struct btrfs_ioctl_space_info { __u64 flags; __u64 total_bytes; @@ -579,4 +604,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ struct btrfs_ioctl_dev_replace_args) +#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \ + struct btrfs_ioctl_same_args) + #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From a05254143cd183b18002cbba7759a1e4629aa762 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 12 Aug 2013 10:56:14 -0400 Subject: Btrfs: skip subvol entries when checking if we've created a dir already We have logic to see if we've already created a parent directory by check to see if an inode inside of that directory has a lower inode number than the one we are currently processing. The logic is that if there is a lower inode number then we would have had to made sure the directory was created at that previous point. The problem is that subvols inode numbers count from the lowest objectid in the root tree, which may be less than our current progress. So just skip if our dir item key is a root item. This fixes the original test and the xfstest version I made that added an extra subvol create. Thanks, Reported-by: Emil Karlson Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 0efc2e2f253c..f8f8b1f3bb2b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2538,7 +2538,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(eb, di, &di_key); - if (di_key.objectid < sctx->send_progress) { + if (di_key.type != BTRFS_ROOT_ITEM_KEY && + di_key.objectid < sctx->send_progress) { ret = 1; goto out; } -- cgit v1.2.3 From 175a2b871f46272a2aedce5fb3222a72568b84c3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 12 Aug 2013 15:36:44 -0400 Subject: Btrfs: don't allow a subvol to be deleted if it is the default subovl Eric pointed out that btrfs will happily allow you to delete the default subvol. This is a problem obviously since the next time you go to mount the file system it will freak out because it can't find the root. Fix this by adding a check to see if our default subvol points to the subvol we are trying to delete, and if it does not allowing it to happen. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 022d8364e072..317a984fe3c9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1726,13 +1726,28 @@ out: static noinline int may_destroy_subvol(struct btrfs_root *root) { struct btrfs_path *path; + struct btrfs_dir_item *di; struct btrfs_key key; + u64 dir_id; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + /* Make sure this root isn't set as the default subvol */ + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, + dir_id, "default", 7, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + if (key.objectid == root->root_key.objectid) { + ret = -ENOTEMPTY; + goto out; + } + btrfs_release_path(path); + } + key.objectid = root->root_key.objectid; key.type = BTRFS_ROOT_REF_KEY; key.offset = (u64)-1; -- cgit v1.2.3 From 4ef31a45a009a81b2f3c5aaf6f07d7147f80bc8c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 13 Aug 2013 14:10:08 -0400 Subject: Btrfs: fix the error handling wrt orphan items There are several places where we BUG_ON() if we fail to remove the orphan items and such, which is not ok, so remove those and either abort or just carry on. This also fixes a problem where if we couldn't start a transaction we wouldn't actually remove the orphan item reserve for the inode. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 66571dce88d1..de7085bc28f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2940,8 +2940,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, btrfs_root_refs(&root->root_item) > 0) { ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, root->root_key.objectid); - BUG_ON(ret); - root->orphan_item_inserted = 0; + if (ret) + btrfs_abort_transaction(trans, root, ret); + else + root->orphan_item_inserted = 0; } if (block_rsv) { @@ -3010,11 +3012,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - if (ret && ret != -EEXIST) { + if (ret) { clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags); - btrfs_abort_transaction(trans, root, ret); - return ret; + if (reserve) { + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + btrfs_orphan_release_metadata(inode); + } + if (ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); + return ret; + } } ret = 0; } @@ -3053,17 +3062,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, release_rsv = 1; spin_unlock(&root->orphan_lock); - if (trans && delete_item) { + if (trans && delete_item) ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ - } if (release_rsv) { btrfs_orphan_release_metadata(inode); atomic_dec(&root->orphan_inodes); } - return 0; + return ret; } /* @@ -3193,8 +3200,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid); ret = btrfs_del_orphan_item(trans, root, found_key.objectid); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ btrfs_end_transaction(trans, root); + if (ret) + goto out; continue; } @@ -4570,10 +4578,15 @@ void btrfs_evict_inode(struct inode *inode) btrfs_free_block_rsv(root, rsv); + /* + * Errors here aren't a big deal, it just means we leave orphan items + * in the tree. They will be cleaned up on the next mount. + */ if (ret == 0) { trans->block_rsv = root->orphan_block_rsv; - ret = btrfs_orphan_del(trans, inode); - BUG_ON(ret); + btrfs_orphan_del(trans, inode); + } else { + btrfs_orphan_del(NULL, inode); } trans->block_rsv = &root->fs_info->trans_block_rsv; @@ -8120,10 +8133,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.name, new_dentry->d_name.len); } - if (!ret && new_inode->i_nlink == 0) { + if (!ret && new_inode->i_nlink == 0) ret = btrfs_orphan_add(trans, new_dentry->d_inode); - BUG_ON(ret); - } if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_fail; -- cgit v1.2.3 From 9ffba8cda917c0158857426f0e74b64d0206aaa9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Aug 2013 11:33:56 -0400 Subject: Btrfs: fix heavy delalloc related deadlock I added a patch where we started taking the ordered operations mutex when we waited on ordered extents. We need this because we splice the list and process it, so if a flusher came in during this scenario it would think the list was empty and we'd usually get an early ENOSPC. The problem with this is that this lock is used in transaction committing. So we end up with something like this Transaction commit -> wait on writers Delalloc flusher -> run_ordered_operations (holds mutex) ->wait for filemap-flush to do its thing flush task -> cow_file_range ->wait on btrfs_join_transaction because we're commiting some other task -> commit_transaction because we notice trans->transaction->flush is set -> run_ordered_operations (hang on mutex) We need to disentangle the ordered operations flushing from the delalloc flushing, since they are separate things. This solves the deadlock issue I was seeing. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/ordered-data.c | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0632832d4446..063e48587126 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1413,6 +1413,13 @@ struct btrfs_fs_info { * before jumping into the main commit. */ struct mutex ordered_operations_mutex; + + /* + * Same as ordered_operations_mutex except this is for ordered extents + * and not the operations. + */ + struct mutex ordered_extent_flush_mutex; + struct rw_semaphore extent_commit_sem; struct rw_semaphore cleanup_work_sem; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 572e21eb644c..68391ecf2c59 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2277,6 +2277,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->ordered_operations_mutex); + mutex_init(&fs_info->ordered_extent_flush_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 81369827e514..b52b2c4010c2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -671,7 +671,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&works); - mutex_lock(&root->fs_info->ordered_operations_mutex); + mutex_lock(&root->fs_info->ordered_extent_flush_mutex); spin_lock(&root->fs_info->ordered_root_lock); list_splice_init(&cur_trans->ordered_operations, &splice); while (!list_empty(&splice)) { @@ -718,7 +718,7 @@ out: list_del_init(&work->list); btrfs_wait_and_free_delalloc_work(work); } - mutex_unlock(&root->fs_info->ordered_operations_mutex); + mutex_unlock(&root->fs_info->ordered_extent_flush_mutex); return ret; } -- cgit v1.2.3 From 00361589d2eebd90fca022148c763e40d3e90871 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Aug 2013 14:02:47 -0400 Subject: Btrfs: avoid starting a transaction in the write path I noticed while looking at a deadlock that we are always starting a transaction in cow_file_range(). This isn't really needed since we only need a transaction if we are doing an inline extent, or if the allocator needs to allocate a chunk. So push down all the transaction start stuff to be closer to where we actually need a transaction in all of these cases. This will hopefully reduce our write latency when we are committing often. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 11 +-- fs/btrfs/extent-tree.c | 35 ++++--- fs/btrfs/file.c | 11 +-- fs/btrfs/free-space-cache.c | 3 +- fs/btrfs/free-space-cache.h | 3 +- fs/btrfs/inode.c | 217 +++++++++++++++----------------------------- 6 files changed, 101 insertions(+), 179 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 063e48587126..7c93d9f13812 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3165,11 +3165,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, int is_data); +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, + u64 min_alloc_size, u64 empty_size, u64 hint_byte, + struct btrfs_key *ins, int is_data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref, int for_cow); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -3612,8 +3610,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); -noinline int can_nocow_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, +noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 277d2c26b034..f1c1694d34b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6121,8 +6121,7 @@ enum btrfs_loop_type { * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static noinline int find_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *orig_root, +static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, u64 flags) @@ -6345,10 +6344,10 @@ refill_cluster: block_group->full_stripe_len); /* allocate a cluster in this block group */ - ret = btrfs_find_space_cluster(trans, root, - block_group, last_ptr, - search_start, num_bytes, - aligned_cluster); + ret = btrfs_find_space_cluster(root, block_group, + last_ptr, search_start, + num_bytes, + aligned_cluster); if (ret == 0) { /* * now pull our allocation out of this @@ -6479,17 +6478,28 @@ loop: index = 0; loop++; if (loop == LOOP_ALLOC_CHUNK) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = do_chunk_alloc(trans, root, flags, CHUNK_ALLOC_FORCE); /* * Do not bail out on ENOSPC since we * can do more things. */ - if (ret < 0 && ret != -ENOSPC) { + if (ret < 0 && ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); + else + ret = 0; + btrfs_end_transaction(trans, root); + if (ret) goto out; - } } if (loop == LOOP_NO_EMPTY_SIZE) { @@ -6553,8 +6563,7 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data) @@ -6566,8 +6575,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(trans, root, num_bytes, empty_size, - hint_byte, ins, flags); + ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, + flags); if (ret == -ENOSPC) { if (!final_tried) { @@ -6955,7 +6964,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8cc941e5b3e1..5e7ea996f105 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1339,7 +1339,6 @@ fail: static noinline int check_can_nocow(struct inode *inode, loff_t pos, size_t *write_bytes) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered; u64 lockstart, lockend; @@ -1361,16 +1360,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, btrfs_put_ordered_extent(ordered); } - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); - return PTR_ERR(trans); - } - num_bytes = lockend - lockstart + 1; - ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, - NULL); - btrfs_end_transaction(trans, root); + ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; } else { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index b21a3cd667d8..25f906fd0221 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2525,8 +2525,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, * returns zero and sets up cluster if things worked out, otherwise * it returns -enospc */ -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 894116b71304..4acfb19ba4ee 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -98,8 +98,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, u64 bytes); -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index de7085bc28f6..264feefa0290 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -230,12 +230,13 @@ fail: * does the checks required to make sure the data is small enough * to fit as an inline extent. */ -static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, u64 start, u64 end, - size_t compressed_size, int compress_type, - struct page **compressed_pages) +static noinline int cow_file_range_inline(struct btrfs_root *root, + struct inode *inode, u64 start, + u64 end, size_t compressed_size, + int compress_type, + struct page **compressed_pages) { + struct btrfs_trans_handle *trans; u64 isize = i_size_read(inode); u64 actual_end = min(end + 1, isize); u64 inline_len = actual_end - start; @@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; + ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); - if (ret) - return ret; + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } if (isize > actual_end) inline_len = min_t(u64, isize, actual_end); @@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, compress_type, compressed_pages); if (ret && ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); - return ret; + goto out; } else if (ret == -ENOSPC) { - return 1; + ret = 1; + goto out; } set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); - return 0; +out: + btrfs_end_transaction(trans, root); + return ret; } struct async_extent { @@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode, int *num_added) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; u64 num_bytes; u64 blocksize = root->sectorsize; u64 actual_end; @@ -461,25 +471,16 @@ again: } cont: if (start == 0) { - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto cleanup_and_out; - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - /* lets try to make an inline extent */ if (ret || total_in < (actual_end - start)) { /* we didn't compress the entire range, try * to make an uncompressed inline extent. */ - ret = cow_file_range_inline(trans, root, inode, - start, end, 0, 0, NULL); + ret = cow_file_range_inline(root, inode, start, end, + 0, 0, NULL); } else { /* try making a compressed inline extent */ - ret = cow_file_range_inline(trans, root, inode, - start, end, + ret = cow_file_range_inline(root, inode, start, end, total_compressed, compress_type, pages); } @@ -498,10 +499,8 @@ cont: PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); - btrfs_end_transaction(trans, root); goto free_pages_out; } - btrfs_end_transaction(trans, root); } if (will_compress) { @@ -592,18 +591,6 @@ free_pages_out: kfree(pages); goto out; - -cleanup_and_out: - extent_clear_unlock_delalloc(inode, start, end, NULL, - EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, PAGE_UNLOCK | - PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | - PAGE_END_WRITEBACK); - if (!trans || IS_ERR(trans)) - btrfs_error(root->fs_info, ret, "Failed to join transaction"); - else - btrfs_abort_transaction(trans, root, ret); - goto free_pages_out; } /* @@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode, { struct async_extent *async_extent; u64 alloc_hint = 0; - struct btrfs_trans_handle *trans; struct btrfs_key ins; struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -678,20 +664,10 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - } else { - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_reserve_extent(trans, root, + ret = btrfs_reserve_extent(root, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, &ins, 1); - if (ret && ret != -ENOSPC) - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - } - if (ret) { int i; @@ -850,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, * required to start IO on it. It may be clean and already done with * IO when we return. */ -static noinline int __cow_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, - struct btrfs_root *root, - struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) +static noinline int cow_file_range(struct inode *inode, + struct page *locked_page, + u64 start, u64 end, int *page_started, + unsigned long *nr_written, + int unlock) { + struct btrfs_root *root = BTRFS_I(inode)->root; u64 alloc_hint = 0; u64 num_bytes; unsigned long ram_size; @@ -878,12 +853,12 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, /* if this is a small write inside eof, kick off defrag */ if (num_bytes < 64 * 1024 && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) - btrfs_add_inode_defrag(trans, inode); + btrfs_add_inode_defrag(NULL, inode); if (start == 0) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(trans, root, inode, - start, end, 0, 0, NULL); + ret = cow_file_range_inline(root, inode, start, end, 0, 0, + NULL); if (ret == 0) { extent_clear_unlock_delalloc(inode, start, end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | @@ -896,7 +871,6 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, *page_started = 1; goto out; } else if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); goto out_unlock; } } @@ -911,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, unsigned long op; cur_alloc_size = disk_num_bytes; - ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + ret = btrfs_reserve_extent(root, cur_alloc_size, root->sectorsize, 0, alloc_hint, &ins, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + if (ret < 0) goto out_unlock; - } em = alloc_extent_map(); if (!em) { @@ -963,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto out_reserve; - } } if (disk_num_bytes < cur_alloc_size) @@ -1005,37 +975,6 @@ out_unlock: goto out; } -static noinline int cow_file_range(struct inode *inode, - struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, start, end, locked_page, - EXTENT_LOCKED | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, PAGE_UNLOCK | - PAGE_CLEAR_DIRTY | - PAGE_SET_WRITEBACK | - PAGE_END_WRITEBACK); - return PTR_ERR(trans); - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - - ret = __cow_file_range(trans, inode, root, locked_page, start, end, - page_started, nr_written, unlock); - - btrfs_end_transaction(trans, root); - - return ret; -} - /* * work queue call back to started compression on a file and pages */ @@ -1347,9 +1286,9 @@ out_check: btrfs_release_path(path); if (cow_start != (u64)-1) { - ret = __cow_file_range(trans, inode, root, locked_page, - cow_start, found_key.offset - 1, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, + cow_start, found_key.offset - 1, + page_started, nr_written, 1); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error; @@ -1423,9 +1362,8 @@ out_check: } if (cow_start != (u64)-1) { - ret = __cow_file_range(trans, inode, root, locked_page, - cow_start, end, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, cow_start, end, + page_started, nr_written, 1); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error; @@ -6344,39 +6282,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, u64 start, u64 len) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct extent_map *em; struct btrfs_key ins; u64 alloc_hint; int ret; - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return ERR_CAST(trans); - - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - alloc_hint = get_extent_allocation_hint(inode, start, len); - ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, + ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, alloc_hint, &ins, 1); - if (ret) { - em = ERR_PTR(ret); - goto out; - } + if (ret) + return ERR_PTR(ret); em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, ins.offset, ins.offset, ins.offset, 0); - if (IS_ERR(em)) - goto out; + if (IS_ERR(em)) { + btrfs_free_reserved_extent(root, ins.objectid, ins.offset); + return em; + } ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, ins.offset, ins.offset, 0); if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset); - em = ERR_PTR(ret); + free_extent_map(em); + return ERR_PTR(ret); } -out: - btrfs_end_transaction(trans, root); + return em; } @@ -6384,11 +6315,11 @@ out: * returns 1 when the nocow is safe, < 1 on error, 0 if the * block must be cow'd */ -noinline int can_nocow_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, +noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes) { + struct btrfs_trans_handle *trans; struct btrfs_path *path; int ret; struct extent_buffer *leaf; @@ -6406,7 +6337,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), offset, 0); if (ret < 0) goto out; @@ -6471,9 +6402,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans, * look for other files referencing this extent, if we * find any we must cow */ - if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), - key.offset - backref_offset, disk_bytenr)) + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = 0; goto out; + } + + ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), + key.offset - backref_offset, disk_bytenr); + btrfs_end_transaction(trans, root); + if (ret) { + ret = 0; + goto out; + } /* * adjust disk_bytenr and num_bytes to cover just the bytes @@ -6615,7 +6556,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - struct btrfs_trans_handle *trans; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -6697,16 +6637,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, len = min(len, em->len - (start - em->start)); block_start = em->block_start + (start - em->start); - /* - * we're not going to log anything, but we do need - * to make sure the current transaction stays open - * while we look for nocow cross refs - */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - goto must_cow; - - if (can_nocow_extent(trans, inode, start, &len, &orig_start, + if (can_nocow_extent(inode, start, &len, &orig_start, &orig_block_len, &ram_bytes) == 1) { if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); @@ -6715,24 +6646,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, block_start, len, orig_block_len, ram_bytes, type); - if (IS_ERR(em)) { - btrfs_end_transaction(trans, root); + if (IS_ERR(em)) goto unlock_err; - } } ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, len, type); - btrfs_end_transaction(trans, root); if (ret) { free_extent_map(em); goto unlock_err; } goto unlock; } - btrfs_end_transaction(trans, root); } -must_cow: + /* * this will cow the extent, reset the len in case we changed * it above @@ -8495,8 +8422,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); cur_bytes = max(cur_bytes, min_size); - ret = btrfs_reserve_extent(trans, root, cur_bytes, - min_size, 0, *alloc_hint, &ins, 1); + ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, + *alloc_hint, &ins, 1); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); -- cgit v1.2.3 From dc11dd5d707a4157882f281c96055d6894d10c8c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Aug 2013 15:05:12 -0400 Subject: Btrfs: separate out tests into their own directory The plan is to have a bunch of unit tests that run when btrfs is loaded when you build with the appropriate config option. My ultimate goal is to have a test for every non-static function we have, but at first I'm going to focus on the things that cause us the most problems. To start out with this just adds a tests/ directory and moves the existing free space cache tests into that directory and sets up all of the infrastructure. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 + fs/btrfs/free-space-cache.c | 488 +++++--------------------------------- fs/btrfs/free-space-cache.h | 8 +- fs/btrfs/super.c | 19 +- fs/btrfs/tests/btrfs-tests.h | 34 +++ fs/btrfs/tests/free-space-tests.c | 395 ++++++++++++++++++++++++++++++ 6 files changed, 510 insertions(+), 436 deletions(-) create mode 100644 fs/btrfs/tests/btrfs-tests.h create mode 100644 fs/btrfs/tests/free-space-tests.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 3932224f99e9..47553d553206 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -12,3 +12,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o + +btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 25f906fd0221..34ecc651c9ab 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2972,33 +2972,68 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -static struct btrfs_block_group_cache *init_test_block_group(void) +/* + * Use this if you need to make a bitmap or extent entry specifically, it + * doesn't do any of the merging that add_free_space does, this acts a lot like + * how the free space cache loading stuff works, so you can get really weird + * configurations. + */ +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap) { - struct btrfs_block_group_cache *cache; + struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; + struct btrfs_free_space *info = NULL, *bitmap_info; + void *map = NULL; + u64 bytes_added; + int ret; - cache = kzalloc(sizeof(*cache), GFP_NOFS); - if (!cache) - return NULL; - cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), - GFP_NOFS); - if (!cache->free_space_ctl) { - kfree(cache); - return NULL; +again: + if (!info) { + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); + if (!info) + return -ENOMEM; } - cache->key.objectid = 0; - cache->key.offset = 1024 * 1024 * 1024; - cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; + if (!bitmap) { + spin_lock(&ctl->tree_lock); + info->offset = offset; + info->bytes = bytes; + ret = link_free_space(ctl, info); + spin_unlock(&ctl->tree_lock); + if (ret) + kmem_cache_free(btrfs_free_space_cachep, info); + return ret; + } + + if (!map) { + map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!map) { + kmem_cache_free(btrfs_free_space_cachep, info); + return -ENOMEM; + } + } + + spin_lock(&ctl->tree_lock); + bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), + 1, 0); + if (!bitmap_info) { + info->bitmap = map; + map = NULL; + add_new_bitmap(ctl, info, offset); + bitmap_info = info; + } - spin_lock_init(&cache->lock); - INIT_LIST_HEAD(&cache->list); - INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + spin_unlock(&ctl->tree_lock); - btrfs_init_free_space_ctl(cache); + if (bytes) + goto again; - return cache; + if (map) + kfree(map); + return 0; } /* @@ -3006,8 +3041,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void) * just used to check the absence of space, so if there is free space in the * range at all we will return 1. */ -static int check_exists(struct btrfs_block_group_cache *cache, u64 offset, - u64 bytes) +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) { struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; struct btrfs_free_space *info; @@ -3084,411 +3119,4 @@ out: spin_unlock(&ctl->tree_lock); return ret; } - -/* - * Use this if you need to make a bitmap or extent entry specifically, it - * doesn't do any of the merging that add_free_space does, this acts a lot like - * how the free space cache loading stuff works, so you can get really weird - * configurations. - */ -static int add_free_space_entry(struct btrfs_block_group_cache *cache, - u64 offset, u64 bytes, bool bitmap) -{ - struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; - struct btrfs_free_space *info = NULL, *bitmap_info; - void *map = NULL; - u64 bytes_added; - int ret; - -again: - if (!info) { - info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); - if (!info) - return -ENOMEM; - } - - if (!bitmap) { - spin_lock(&ctl->tree_lock); - info->offset = offset; - info->bytes = bytes; - ret = link_free_space(ctl, info); - spin_unlock(&ctl->tree_lock); - if (ret) - kmem_cache_free(btrfs_free_space_cachep, info); - return ret; - } - - if (!map) { - map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!map) { - kmem_cache_free(btrfs_free_space_cachep, info); - return -ENOMEM; - } - } - - spin_lock(&ctl->tree_lock); - bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), - 1, 0); - if (!bitmap_info) { - info->bitmap = map; - map = NULL; - add_new_bitmap(ctl, info, offset); - bitmap_info = info; - } - - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); - bytes -= bytes_added; - offset += bytes_added; - spin_unlock(&ctl->tree_lock); - - if (bytes) - goto again; - - if (map) - kfree(map); - return 0; -} - -#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__) - -/* - * This test just does basic sanity checking, making sure we can add an exten - * entry and remove space from either end and the middle, and make sure we can - * remove space that covers adjacent extent entries. - */ -static int test_extents(struct btrfs_block_group_cache *cache) -{ - int ret = 0; - - test_msg("Running extent only tests\n"); - - /* First just make sure we can remove an entire entry */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error adding initial extents %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error removing extent %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 4 * 1024 * 1024)) { - test_msg("Full remove left some lingering space\n"); - return -1; - } - - /* Ok edge and middle cases now */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error adding half extent %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); - if (ret) { - test_msg("Error removing tail end %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); - if (ret) { - test_msg("Error removing front end %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); - if (ret) { - test_msg("Error removing middle piece %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 1 * 1024 * 1024)) { - test_msg("Still have space at the front\n"); - return -1; - } - - if (check_exists(cache, 2 * 1024 * 1024, 4096)) { - test_msg("Still have space in the middle\n"); - return -1; - } - - if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { - test_msg("Still have space at the end\n"); - return -1; - } - - /* Cleanup */ - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - return 0; -} - -static int test_bitmaps(struct btrfs_block_group_cache *cache) -{ - u64 next_bitmap_offset; - int ret; - - test_msg("Running bitmap only tests\n"); - - ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't create a bitmap entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error removing bitmap full range %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 4 * 1024 * 1024)) { - test_msg("Left some space in bitmap\n"); - return -1; - } - - ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add to our bitmap entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove middle chunk %d\n", ret); - return ret; - } - - /* - * The first bitmap we have starts at offset 0 so the next one is just - * at the end of the first bitmap. - */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); - - /* Test a bit straddling two bitmaps */ - ret = add_free_space_entry(cache, next_bitmap_offset - - (2 * 1024 * 1024), 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add space that straddles two bitmaps %d\n", - ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, next_bitmap_offset - - (1 * 1024 * 1024), 2 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), - 2 * 1024 * 1024)) { - test_msg("Left some space when removing overlapping\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - return 0; -} - -/* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) -{ - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); - int ret; - - test_msg("Running bitmap and extent tests\n"); - - /* - * First let's do something simple, an extent at the same offset as the - * bitmap, but the free space completely in the extent and then - * completely in the bitmap. - */ - ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't create bitmap entry %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove extent entry %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 1 * 1024 * 1024)) { - test_msg("Left remnants after our remove\n"); - return -1; - } - - /* Now to add back the extent entry and remove from the bitmap */ - ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't re-add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove from bitmap %d\n", ret); - return ret; - } - - if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { - test_msg("Left remnants in the bitmap\n"); - return -1; - } - - /* - * Ok so a little more evil, extent entry and bitmap at the same offset, - * removing an overlapping chunk. - */ - ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add to a bitmap %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { - test_msg("Left over peices after removing overlapping\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - /* Now with the extent entry offset into the bitmap */ - ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add space to the bitmap %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent to the cache %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); - if (ret) { - test_msg("Problem removing overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { - test_msg("Left something behind when removing space"); - return -1; - } - - /* - * This has blown up in the past, the extent entry starts before the - * bitmap entry, but we're trying to remove an offset that falls - * completely within the bitmap range and is in both the extent entry - * and the bitmap entry, looks like this - * - * [ extent ] - * [ bitmap ] - * [ del ] - */ - __btrfs_remove_free_space_cache(cache->free_space_ctl); - ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, - 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add bitmap %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, - 5 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024); - if (ret) { - test_msg("Failed to free our space %d\n", ret); - return ret; - } - - if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024)) { - test_msg("Left stuff over\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - /* - * This blew up before, we have part of the free space in a bitmap and - * then the entirety of the rest of the space in an extent. This used - * to return -EAGAIN back from btrfs_remove_extent, make sure this - * doesn't happen. - */ - ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add bitmap entry %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); - if (ret) { - test_msg("Error removing bitmap and extent overlapping %d\n", ret); - return ret; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - return 0; -} - -void btrfs_test_free_space_cache(void) -{ - struct btrfs_block_group_cache *cache; - - test_msg("Running btrfs free space cache tests\n"); - - cache = init_test_block_group(); - if (!cache) { - test_msg("Couldn't run the tests\n"); - return; - } - - if (test_extents(cache)) - goto out; - if (test_bitmaps(cache)) - goto out; - if (test_bitmaps_and_extents(cache)) - goto out; -out: - __btrfs_remove_free_space_cache(cache->free_space_ctl); - kfree(cache->free_space_ctl); - kfree(cache); - test_msg("Free space cache tests finished\n"); -} -#undef test_msg -#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ -void btrfs_test_free_space_cache(void) {} -#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ +#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 4acfb19ba4ee..c74904167476 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -112,6 +112,12 @@ int btrfs_return_cluster_to_free_space( int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, u64 *trimmed, u64 start, u64 end, u64 minlen); -void btrfs_test_free_space_cache(void); +/* Support functions for runnint our sanity tests */ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap); +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes); +#endif #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 196790375bb3..1bd0bb523660 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,7 @@ #include "rcu-string.h" #include "dev-replace.h" #include "free-space-cache.h" +#include "tests/btrfs-tests.h" #define CREATE_TRACE_POINTS #include @@ -1762,6 +1763,11 @@ static void btrfs_print_info(void) "\n"); } +static int btrfs_run_sanity_tests(void) +{ + return btrfs_test_free_space_cache(); +} + static int __init init_btrfs_fs(void) { int err; @@ -1804,14 +1810,17 @@ static int __init init_btrfs_fs(void) if (err) goto free_delayed_ref; - err = register_filesystem(&btrfs_fs_type); - if (err) - goto unregister_ioctl; - btrfs_init_lockdep(); btrfs_print_info(); - btrfs_test_free_space_cache(); + + err = btrfs_run_sanity_tests(); + if (err) + goto unregister_ioctl; + + err = register_filesystem(&btrfs_fs_type); + if (err) + goto unregister_ioctl; return 0; diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h new file mode 100644 index 000000000000..580877625776 --- /dev/null +++ b/fs/btrfs/tests/btrfs-tests.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_TESTS +#define __BTRFS_TESTS + +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + +#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) + +int btrfs_test_free_space_cache(void); +#else +static inline int btrfs_test_free_space_cache(void) +{ + return 0; +} +#endif + +#endif diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c new file mode 100644 index 000000000000..6fc82010dc15 --- /dev/null +++ b/fs/btrfs/tests/free-space-tests.c @@ -0,0 +1,395 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "btrfs-tests.h" +#include "../ctree.h" +#include "../free-space-cache.h" + +#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) +static struct btrfs_block_group_cache *init_test_block_group(void) +{ + struct btrfs_block_group_cache *cache; + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + if (!cache) + return NULL; + cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), + GFP_NOFS); + if (!cache->free_space_ctl) { + kfree(cache); + return NULL; + } + + cache->key.objectid = 0; + cache->key.offset = 1024 * 1024 * 1024; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + cache->sectorsize = 4096; + + spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); + INIT_LIST_HEAD(&cache->cluster_list); + INIT_LIST_HEAD(&cache->new_bg_list); + + btrfs_init_free_space_ctl(cache); + + return cache; +} + +/* + * This test just does basic sanity checking, making sure we can add an exten + * entry and remove space from either end and the middle, and make sure we can + * remove space that covers adjacent extent entries. + */ +static int test_extents(struct btrfs_block_group_cache *cache) +{ + int ret = 0; + + test_msg("Running extent only tests\n"); + + /* First just make sure we can remove an entire entry */ + ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error adding initial extents %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error removing extent %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + test_msg("Full remove left some lingering space\n"); + return -1; + } + + /* Ok edge and middle cases now */ + ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error adding half extent %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); + if (ret) { + test_msg("Error removing tail end %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + if (ret) { + test_msg("Error removing front end %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); + if (ret) { + test_msg("Error removing middle peice %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + test_msg("Still have space at the front\n"); + return -1; + } + + if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) { + test_msg("Still have space in the middle\n"); + return -1; + } + + if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { + test_msg("Still have space at the end\n"); + return -1; + } + + /* Cleanup */ + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + return 0; +} + +static int test_bitmaps(struct btrfs_block_group_cache *cache) +{ + u64 next_bitmap_offset; + int ret; + + test_msg("Running bitmap only tests\n"); + + ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't create a bitmap entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error removing bitmap full range %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + test_msg("Left some space in bitmap\n"); + return -1; + } + + ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add to our bitmap entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove middle chunk %d\n", ret); + return ret; + } + + /* + * The first bitmap we have starts at offset 0 so the next one is just + * at the end of the first bitmap. + */ + next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + + /* Test a bit straddling two bitmaps */ + ret = test_add_free_space_entry(cache, next_bitmap_offset - + (2 * 1024 * 1024), 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add space that straddles two bitmaps %d\n", + ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, next_bitmap_offset - + (1 * 1024 * 1024), 2 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), + 2 * 1024 * 1024)) { + test_msg("Left some space when removing overlapping\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + return 0; +} + +/* This is the high grade jackassery */ +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +{ + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + int ret; + + test_msg("Running bitmap and extent tests\n"); + + /* + * First let's do something simple, an extent at the same offset as the + * bitmap, but the free space completely in the extent and then + * completely in the bitmap. + */ + ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't create bitmap entry %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove extent entry %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + test_msg("Left remnants after our remove\n"); + return -1; + } + + /* Now to add back the extent entry and remove from the bitmap */ + ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't re-add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove from bitmap %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { + test_msg("Left remnants in the bitmap\n"); + return -1; + } + + /* + * Ok so a little more evil, extent entry and bitmap at the same offset, + * removing an overlapping chunk. + */ + ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add to a bitmap %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { + test_msg("Left over peices after removing overlapping\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + /* Now with the extent entry offset into the bitmap */ + ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add space to the bitmap %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent to the cache %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); + if (ret) { + test_msg("Problem removing overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { + test_msg("Left something behind when removing space"); + return -1; + } + + /* + * This has blown up in the past, the extent entry starts before the + * bitmap entry, but we're trying to remove an offset that falls + * completely within the bitmap range and is in both the extent entry + * and the bitmap entry, looks like this + * + * [ extent ] + * [ bitmap ] + * [ del ] + */ + __btrfs_remove_free_space_cache(cache->free_space_ctl); + ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, + 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add bitmap %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, + 5 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, + 5 * 1024 * 1024); + if (ret) { + test_msg("Failed to free our space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024, + 5 * 1024 * 1024)) { + test_msg("Left stuff over\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + /* + * This blew up before, we have part of the free space in a bitmap and + * then the entirety of the rest of the space in an extent. This used + * to return -EAGAIN back from btrfs_remove_extent, make sure this + * doesn't happen. + */ + ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add bitmap entry %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); + if (ret) { + test_msg("Error removing bitmap and extent overlapping %d\n", ret); + return ret; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + return 0; +} + +int btrfs_test_free_space_cache(void) +{ + struct btrfs_block_group_cache *cache; + int ret; + + test_msg("Running btrfs free space cache tests\n"); + + cache = init_test_block_group(); + if (!cache) { + test_msg("Couldn't run the tests\n"); + return 0; + } + + ret = test_extents(cache); + if (ret) + goto out; + ret = test_bitmaps(cache); + if (ret) + goto out; + ret = test_bitmaps_and_extents(cache); + if (ret) + goto out; +out: + __btrfs_remove_free_space_cache(cache->free_space_ctl); + kfree(cache->free_space_ctl); + kfree(cache); + test_msg("Free space cache tests finished\n"); + return ret; +} -- cgit v1.2.3 From ba5e8f2e2d3074bf151dd222dae9bb400e621b82 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 16 Aug 2013 16:52:55 -0400 Subject: Btrfs: fix send issues related to inode number reuse If you are sending a snapshot and specifying a parent snapshot we will walk the trees and figure out where they differ and send the differences only. The way we check for differences are if the leaves aren't the same and if the keys are not the same within the leaves. So if neither leaf is the same (ie the leaf has been cow'ed from the parent snapshot) we walk each item in the send root and check it against the parent root. If the items match exactly then we don't do anything. This doesn't quite work for inode refs, since they will just have the name and the parent objectid. If you move the file from a directory and then remove that directory and re-create a directory with the same inode number as the old directory and then move that file back into that directory we will assume that nothing changed and you will get errors when you try to receive. In order to fix this we need to do extra checking to see if the inode ref really is the same or not. So do this by passing down BTRFS_COMPARE_TREE_SAME if the items match. Then if the key type is an inode ref we can do some extra checking, otherwise we just keep processing. The extra checking is to look up the generation of the directory in the parent volume and compare it to the generation of the send volume. If they match then they are the same directory and we are good to go. If they don't we have to add them to the changed refs list. This means we have to track the generation of the ref we're trying to lookup when we iterate all the refs for a particular inode. So in the case of looking for new refs we have to get the generation from the parent volume, and in the case of looking for deleted refs we have to get the generation from the send volume to compare with. There was also the issue of using a ulist to keep track of the directories we needed to check. Because we can get a deleted ref and a new ref for the same inode number the ulist won't work since it indexes based on the value. So instead just dup any directory ref we find and add it to a local list, and then process that list as normal and do away with using a ulist for this altogether. Before we would fail all of the tests in the far-progs that related to moving directories (test group 32). With this patch we now pass these tests, and all of the tests in the far-progs send testing suite. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 21 ++++---- fs/btrfs/ctree.h | 1 + fs/btrfs/send.c | 162 ++++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 144 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5f7a97556583..0708ebed2df7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5297,19 +5297,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; advance_right = ADVANCE; } else { + enum btrfs_compare_tree_result cmp; + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); ret = tree_compare_item(left_root, left_path, right_path, tmp_buf); - if (ret) { - WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); - ret = changed_cb(left_root, right_root, - left_path, right_path, - &left_key, - BTRFS_COMPARE_TREE_CHANGED, - ctx); - if (ret < 0) - goto out; - } + if (ret) + cmp = BTRFS_COMPARE_TREE_CHANGED; + else + cmp = BTRFS_COMPARE_TREE_SAME; + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, cmp, ctx); + if (ret < 0) + goto out; advance_left = ADVANCE; advance_right = ADVANCE; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7c93d9f13812..9766e9f04024 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3301,6 +3301,7 @@ enum btrfs_compare_tree_result { BTRFS_COMPARE_TREE_NEW, BTRFS_COMPARE_TREE_DELETED, BTRFS_COMPARE_TREE_CHANGED, + BTRFS_COMPARE_TREE_SAME, }; typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, struct btrfs_root *right_root, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f8f8b1f3bb2b..fc03a5755a6b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2630,6 +2630,22 @@ static int record_ref(struct list_head *head, u64 dir, return 0; } +static int dup_ref(struct recorded_ref *ref, struct list_head *list) +{ + struct recorded_ref *new; + + new = kmalloc(sizeof(*ref), GFP_NOFS); + if (!new) + return -ENOMEM; + + new->dir = ref->dir; + new->dir_gen = ref->dir_gen; + new->full_path = NULL; + INIT_LIST_HEAD(&new->list); + list_add_tail(&new->list, list); + return 0; +} + static void __free_recorded_refs(struct list_head *head) { struct recorded_ref *cur; @@ -2744,9 +2760,7 @@ static int process_recorded_refs(struct send_ctx *sctx) int ret = 0; struct recorded_ref *cur; struct recorded_ref *cur2; - struct ulist *check_dirs = NULL; - struct ulist_iterator uit; - struct ulist_node *un; + struct list_head check_dirs; struct fs_path *valid_path = NULL; u64 ow_inode = 0; u64 ow_gen; @@ -2760,6 +2774,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * which is always '..' */ BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); + INIT_LIST_HEAD(&check_dirs); valid_path = fs_path_alloc(); if (!valid_path) { @@ -2767,12 +2782,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; } - check_dirs = ulist_alloc(GFP_NOFS); - if (!check_dirs) { - ret = -ENOMEM; - goto out; - } - /* * First, check if the first ref of the current inode was overwritten * before. If yes, we know that the current inode was already orphanized @@ -2909,8 +2918,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; } } - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } @@ -2938,8 +2946,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } list_for_each_entry(cur, &sctx->deleted_refs, list) { - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } @@ -2950,8 +2957,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); */ cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, list); - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } else if (!S_ISDIR(sctx->cur_inode_mode)) { @@ -2971,12 +2977,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); if (ret < 0) goto out; } - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } - /* * If the inode is still orphan, unlink the orphan. This may * happen when a previous inode did overwrite the first ref @@ -2998,33 +3002,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * deletion and if it's finally possible to perform the rmdir now. * We also update the inode stats of the parent dirs here. */ - ULIST_ITER_INIT(&uit); - while ((un = ulist_next(check_dirs, &uit))) { + list_for_each_entry(cur, &check_dirs, list) { /* * In case we had refs into dirs that were not processed yet, * we don't need to do the utime and rmdir logic for these dirs. * The dir will be processed later. */ - if (un->val > sctx->cur_ino) + if (cur->dir > sctx->cur_ino) continue; - ret = get_cur_inode_state(sctx, un->val, un->aux); + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; if (ret == inode_state_did_create || ret == inode_state_no_change) { /* TODO delayed utimes */ - ret = send_utimes(sctx, un->val, un->aux); + ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; } else if (ret == inode_state_did_delete) { - ret = can_rmdir(sctx, un->val, sctx->cur_ino); + ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); if (ret < 0) goto out; if (ret) { - ret = get_cur_path(sctx, un->val, un->aux, - valid_path); + ret = get_cur_path(sctx, cur->dir, + cur->dir_gen, valid_path); if (ret < 0) goto out; ret = send_rmdir(sctx, valid_path); @@ -3037,8 +3040,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = 0; out: + __free_recorded_refs(&check_dirs); free_recorded_refs(sctx); - ulist_free(check_dirs); fs_path_free(valid_path); return ret; } @@ -3139,6 +3142,8 @@ out: struct find_ref_ctx { u64 dir; + u64 dir_gen; + struct btrfs_root *root; struct fs_path *name; int found_idx; }; @@ -3148,9 +3153,21 @@ static int __find_iref(int num, u64 dir, int index, void *ctx_) { struct find_ref_ctx *ctx = ctx_; + u64 dir_gen; + int ret; if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { + /* + * To avoid doing extra lookups we'll only do this if everything + * else matches. + */ + ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + if (dir_gen != ctx->dir_gen) + return 0; ctx->found_idx = num; return 1; } @@ -3160,14 +3177,16 @@ static int __find_iref(int num, u64 dir, int index, static int find_iref(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, - u64 dir, struct fs_path *name) + u64 dir, u64 dir_gen, struct fs_path *name) { int ret; struct find_ref_ctx ctx; ctx.dir = dir; ctx.name = name; + ctx.dir_gen = dir_gen; ctx.found_idx = -1; + ctx.root = root; ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); if (ret < 0) @@ -3183,11 +3202,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { + u64 dir_gen; int ret; struct send_ctx *sctx = ctx; + ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + ret = find_iref(sctx->parent_root, sctx->right_path, - sctx->cmp_key, dir, name); + sctx->cmp_key, dir, dir_gen, name); if (ret == -ENOENT) ret = __record_new_ref(num, dir, index, name, sctx); else if (ret > 0) @@ -3200,11 +3225,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { + u64 dir_gen; int ret; struct send_ctx *sctx = ctx; + ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, - dir, name); + dir, dir_gen, name); if (ret == -ENOENT) ret = __record_deleted_ref(num, dir, index, name, sctx); else if (ret > 0) @@ -4381,6 +4412,64 @@ static int changed_extent(struct send_ctx *sctx, return ret; } +static int dir_changed(struct send_ctx *sctx, u64 dir) +{ + u64 orig_gen, new_gen; + int ret; + + ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, + NULL, NULL); + if (ret) + return ret; + + ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + + return (orig_gen != new_gen) ? 1 : 0; +} + +static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, + struct btrfs_key *key) +{ + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + u64 dirid = 0, last_dirid = 0; + unsigned long ptr; + u32 item_size; + u32 cur_offset = 0; + int ref_name_len; + int ret = 0; + + /* Easy case, just check this one dirid */ + if (key->type == BTRFS_INODE_REF_KEY) { + dirid = key->offset; + + ret = dir_changed(sctx, dirid); + goto out; + } + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + while (cur_offset < item_size) { + extref = (struct btrfs_inode_extref *)(ptr + + cur_offset); + dirid = btrfs_inode_extref_parent(leaf, extref); + ref_name_len = btrfs_inode_extref_name_len(leaf, extref); + cur_offset += ref_name_len + sizeof(*extref); + if (dirid == last_dirid) + continue; + ret = dir_changed(sctx, dirid); + if (ret) + break; + last_dirid = dirid; + } +out: + return ret; +} + /* * Updates compare related fields in sctx and simply forwards to the actual * changed_xxx functions. @@ -4396,6 +4485,19 @@ static int changed_cb(struct btrfs_root *left_root, int ret = 0; struct send_ctx *sctx = ctx; + if (result == BTRFS_COMPARE_TREE_SAME) { + if (key->type != BTRFS_INODE_REF_KEY && + key->type != BTRFS_INODE_EXTREF_KEY) + return 0; + ret = compare_refs(sctx, left_path, key); + if (!ret) + return 0; + if (ret < 0) + return ret; + result = BTRFS_COMPARE_TREE_CHANGED; + ret = 0; + } + sctx->left_path = left_path; sctx->right_path = right_path; sctx->cmp_key = key; -- cgit v1.2.3 From 4082bd3d734c41b122a91d2a5aded4b76baa84b2 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 14 Aug 2013 09:13:36 +0800 Subject: Btrfs: fix oops when writing dirty qgroups to disk When disabling quota, we should clear out list 'dirty_qgroups',otherwise, we will get oops if enabling quota again. Fix this by abstracting similar code from del_qgroup_rb(). Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Reviewed-by: Arne Jansen Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 64a9e3cb9453..57b9805873f7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, return qgroup; } -/* must be called with qgroup_lock held */ -static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) { - struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); struct btrfs_qgroup_list *list; - if (!qgroup) - return -ENOENT; - - rb_erase(&qgroup->node, &fs_info->qgroup_tree); list_del(&qgroup->dirty); - while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, struct btrfs_qgroup_list, next_group); @@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) kfree(list); } kfree(qgroup); +} +/* must be called with qgroup_lock held */ +static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +{ + struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); + + if (!qgroup) + return -ENOENT; + + rb_erase(&qgroup->node, &fs_info->qgroup_tree); + __del_qgroup_rb(qgroup); return 0; } @@ -435,30 +439,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { struct rb_node *n; struct btrfs_qgroup *qgroup; - struct btrfs_qgroup_list *list; while ((n = rb_first(&fs_info->qgroup_tree))) { qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); - - while (!list_empty(&qgroup->groups)) { - list = list_first_entry(&qgroup->groups, - struct btrfs_qgroup_list, - next_group); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - - while (!list_empty(&qgroup->members)) { - list = list_first_entry(&qgroup->members, - struct btrfs_qgroup_list, - next_member); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - kfree(qgroup); + __del_qgroup_rb(qgroup); } /* * we call btrfs_free_qgroup_config() when umounting -- cgit v1.2.3 From e685da14af6b31e4b336a110cb1bae1afc268be8 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 14 Aug 2013 09:13:37 +0800 Subject: Btrfs: move btrfs_free_qgroup_config() out of spin_lock and fix comments btrfs_free_qgroup_config() is not only called by open/close_ctree(),but also btrfs_disable_quota().And for btrfs_disable_quota(),we have set 'quota_root' to be null before calling btrfs_free_qgroup_config(),so it is safe to cleanup in-memory structures without lock held. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 57b9805873f7..01eeffe9ae53 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -432,8 +432,10 @@ out: } /* - * This is only called from close_ctree() or open_ctree(), both in single- - * treaded paths. Clean up the in-memory structures. No locking needed. + * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), + * first two are in single-threaded paths.And for the third one, we have set + * quota_root to be null with qgroup_lock held before, so it is safe to clean + * up the in-memory structures without qgroup_lock held. */ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { @@ -937,9 +939,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, fs_info->pending_quota_state = 0; quota_root = fs_info->quota_root; fs_info->quota_root = NULL; - btrfs_free_qgroup_config(fs_info); spin_unlock(&fs_info->qgroup_lock); + btrfs_free_qgroup_config(fs_info); + if (!quota_root) { ret = -EINVAL; goto out; -- cgit v1.2.3 From b006b2e4f96a21958164ebf37e53c555d9e4fb2f Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 14 Aug 2013 09:13:38 +0800 Subject: Btrfs: remove reduplicate check when disabling quota We have checked 'quota_root' with qgroup_ioctl_lock held before,So here the check is reduplicate, remove it. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Reviewed-by: Arne Jansen Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 01eeffe9ae53..66c58e120b2c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -943,11 +943,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, btrfs_free_qgroup_config(fs_info); - if (!quota_root) { - ret = -EINVAL; - goto out; - } - ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) goto out; -- cgit v1.2.3 From a696cf3529cecd261d6534fec242e39177b57e19 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 14 Aug 2013 03:00:20 +0100 Subject: Btrfs: add missing error code to BTRFS_IOC_INO_LOOKUP handler If the path doesn't fit in the input buffer, return ENAMETOOLONG instead of returning with a success code (0) and a partially filled and right justified buffer. Also removed useless buffer pointer check outside the while loop. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 317a984fe3c9..f88eeea365f3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2030,8 +2030,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; - if (ptr < name) + if (ptr < name) { + ret = -ENAMETOOLONG; goto out; + } *(ptr + len) = '/'; read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); @@ -2044,8 +2046,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.offset = (u64)-1; dirid = key.objectid; } - if (ptr < name) - goto out; memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; -- cgit v1.2.3 From 18674c6cc10e78745f7af7faf26583ea9fe78c38 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 14 Aug 2013 03:00:21 +0100 Subject: Btrfs: don't miss inode ref items in BTRFS_IOC_INO_LOOKUP If the inode ref key was not found and the current leaf slot was 0 (first item in the leaf) the code would always return -ENOENT. This was not correct because the desired inode ref item might be the last item in the previous leaf. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f88eeea365f3..82fb11a361c4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2013,19 +2013,21 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; + else if (ret > 0) { + ret = btrfs_previous_item(root, path, dirid, + BTRFS_INODE_REF_KEY); + if (ret < 0) + goto out; + else if (ret > 0) { + ret = -ENOENT; + goto out; + } + } l = path->nodes[0]; slot = path->slots[0]; - if (ret > 0 && slot > 0) - slot--; btrfs_item_key_to_cpu(l, &key, slot); - if (ret > 0 && (key.objectid != dirid || - key.type != BTRFS_INODE_REF_KEY)) { - ret = -ENOENT; - goto out; - } - iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; -- cgit v1.2.3 From 35a3621beb3e2face3e7954eaee20a8fa0043fac Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 14 Aug 2013 18:12:25 +0200 Subject: Btrfs: get rid of sparse warnings make C=2 fs/btrfs/ CF=-D__CHECK_ENDIAN__ I tried to filter out the warnings for which patches have already been sent to the mailing list, pending for inclusion in btrfs-next. All these changes should be obviously safe. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 2 +- fs/btrfs/check-integrity.c | 2 +- fs/btrfs/ctree.c | 6 +++--- fs/btrfs/ctree.h | 3 +++ fs/btrfs/delayed-ref.c | 2 +- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent_io.c | 4 ++-- fs/btrfs/send.c | 4 ++-- fs/btrfs/volumes.c | 3 --- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ae798c1d088b..977e9c66f2e3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -365,7 +365,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, node = ulist_next(parents, &uiter); ref->parent = node ? node->val : 0; ref->inode_list = node ? - (struct extent_inode_elem *)(uintptr_t)node->aux : 0; + (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; /* additional parents require new refs being added here */ while ((node = ulist_next(parents, &uiter))) { diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 1f706e93e622..07f39a3dedb0 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2441,7 +2441,7 @@ static int btrfsic_process_written_superblock( int num_copies; int mirror_num; const char *additional_string = NULL; - struct btrfs_disk_key tmp_disk_key; + struct btrfs_disk_key tmp_disk_key = {0}; btrfs_set_disk_key_objectid(&tmp_disk_key, BTRFS_ROOT_ITEM_KEY); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0708ebed2df7..09b3870c2729 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1067,7 +1067,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, int looped = 0; if (!time_seq) - return 0; + return NULL; /* * the very last operation that's logged for a root is the replacement @@ -1078,7 +1078,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, tm = tree_mod_log_search_oldest(fs_info, root_logical, time_seq); if (!looped && !tm) - return 0; + return NULL; /* * if there are no tree operation for the oldest root, we simply * return it. this should only happen if that (old) root is at @@ -4782,7 +4782,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * This may release the path, and so you may lose any locks held at the * time you call it. */ -int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; struct btrfs_disk_key found_key; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9766e9f04024..56f8c1b1016a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3706,6 +3706,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, u64 newer_than, unsigned long max_pages); void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); +void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, + struct btrfs_ioctl_balance_args *bargs); + /* file.c */ int btrfs_auto_defrag_init(void); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f7be9f76a12b..e4d467be2dd4 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } -static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, +static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_node *ref) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 68391ecf2c59..77a9d46b5cdb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1433,8 +1433,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, return 0; } -struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, - struct btrfs_key *key) +static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, + struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_fs_info *fs_info = tree_root->fs_info; @@ -3754,8 +3754,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->ordered_root_lock); } -int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_root *root) +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index df6e6c27782e..94362ad5dc49 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2954,11 +2954,11 @@ static void __extent_readpages(struct extent_io_tree *tree, struct bio **bio, int mirror_num, unsigned long *bio_flags, int rw) { - u64 start; + u64 start = 0; u64 end = 0; u64 page_start; int index; - int first_index; + int first_index = 0; for (index = 0; index < nr_pages; index++) { page_start = page_offset(pages[index]); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fc03a5755a6b..db7da682f0d2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -54,8 +54,8 @@ struct fs_path { char *buf; int buf_len; - int reversed:1; - int virtual_mem:1; + unsigned int reversed:1; + unsigned int virtual_mem:1; char inline_buf[]; }; char pad[PAGE_SIZE]; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 44abd151132a..306547b51a13 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3076,9 +3076,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, - struct btrfs_ioctl_balance_args *bargs); - /* * Should be called with both balance and volume mutexes held */ -- cgit v1.2.3 From 171170c1c5625cab9687ecf6714e09e0c8a6ed3c Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Wed, 14 Aug 2013 23:27:46 +0300 Subject: btrfs: mark some local function as 'static' Cc: Josef Bacik Cc: Chris Mason Signed-off-by: Sergei Trofimovich Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 -- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 1 - fs/btrfs/root-tree.c | 4 ++-- fs/btrfs/transaction.c | 2 +- fs/btrfs/transaction.h | 2 -- 7 files changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 56f8c1b1016a..f697d00819b8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3471,8 +3471,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); -void btrfs_read_root_item(struct extent_buffer *eb, int slot, - struct btrfs_root_item *item); int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, struct btrfs_path *path, struct btrfs_root_item *root_item, struct btrfs_key *root_key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 77a9d46b5cdb..f96a237bb40a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1534,8 +1534,8 @@ fail: return ret; } -struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 94362ad5dc49..3e7d604004f9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1791,7 +1791,7 @@ out: * set the private field for a given byte offset in the tree. If there isn't * an extent_state there already, this does nothing. */ -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; struct extent_state *state; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3940a0386865..6dbc645f1f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -263,7 +263,6 @@ int extent_readpages(struct extent_io_tree *tree, get_extent_t get_extent); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent); -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 70193d0e7058..c67b432d867d 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -29,8 +29,8 @@ * generation numbers as then we know the root was once mounted with an older * kernel that was not aware of the root item structure change. */ -void btrfs_read_root_item(struct extent_buffer *eb, int slot, - struct btrfs_root_item *item) +static void btrfs_read_root_item(struct extent_buffer *eb, int slot, + struct btrfs_root_item *item) { uuid_le uuid; int len; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ea8d52212d75..579efcc1de16 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -837,7 +837,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, * them in one of two extent_io trees. This is used to make sure all of * those extents are on disk for transaction or log commit */ -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, +static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { int ret; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index defbc4269897..5c2af8491621 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -160,8 +160,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages, int mark); int btrfs_write_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_wait_marked_extents(struct btrfs_root *root, -- cgit v1.2.3 From 07b30a49dac4f60a6c4b0b3938bd6f45affb9455 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:17 +0200 Subject: Btrfs: introduce a tree for items that map UUIDs to something Mapping UUIDs to subvolume IDs is an operation with a high effort today. Today, the algorithm even has quadratic effort (based on the number of existing subvolumes), which means, that it takes minutes to send/receive a single subvolume if 10,000 subvolumes exist. But even linear effort would be too much since it is a waste. And these data structures to allow mapping UUIDs to subvolume IDs are created every time a btrfs send/receive instance is started. It is much more efficient to maintain a searchable persistent data structure in the filesystem, one that is updated whenever a subvolume/snapshot is created and deleted, and when the received subvolume UUID is set by the btrfs-receive tool. Therefore kernel code is added with this commit that is able to maintain data structures in the filesystem that allow to quickly search for a given UUID and to retrieve data that is assigned to this UUID, like which subvolume ID is related to this UUID. This commit adds a new tree to hold UUID-to-data mapping items. The key of the items is the full UUID plus the key type BTRFS_UUID_KEY. Multiple data blocks can be stored for a given UUID, a type/length/ value scheme is used. Now follows the lengthy justification, why a new tree was added instead of using the existing root tree: The first approach was to not create another tree that holds UUID items. Instead, the items should just go into the top root tree. Unfortunately this confused the algorithm to assign the objectid of subvolumes and snapshots. The reason is that btrfs_find_free_objectid() calls btrfs_find_highest_objectid() for the first created subvol or snapshot after mounting a filesystem, and this function simply searches for the largest used objectid in the root tree keys to pick the next objectid to assign. Of course, the UUID keys have always been the ones with the highest offset value, and the next assigned subvol ID was wastefully huge. To use any other existing tree did not look proper. To apply a workaround such as setting the objectid to zero in the UUID item key and to implement collision handling would either add limitations (in case of a btrfs_extend_item() approach to handle the collisions) or a lot of complexity and source code (in case a key would be looked up that is free of collisions). Adding new code that introduces limitations is not good, and adding code that is complex and lengthy for no good reason is also not good. That's the justification why a completely new tree was introduced. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/ctree.h | 24 ++++++ fs/btrfs/uuid-tree.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/uuid-tree.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 47553d553206..a91a6a355cc5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o + reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ + uuid-tree.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f697d00819b8..429c54ac1a65 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -91,6 +91,9 @@ struct btrfs_ordered_sum; /* holds quota configuration and tracking */ #define BTRFS_QUOTA_TREE_OBJECTID 8ULL +/* for storing items that use the BTRFS_UUID_KEY* types */ +#define BTRFS_UUID_TREE_OBJECTID 9ULL + /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL @@ -1942,6 +1945,19 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_DEV_REPLACE_KEY 250 +/* + * Stores items that allow to quickly map UUIDs to something else. + * These items are part of the filesystem UUID tree. + * The key is built like this: + * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). + */ +#if BTRFS_UUID_SIZE != 16 +#error "UUID items require BTRFS_UUID_SIZE == 16!" +#endif +#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ +#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to + * received subvols */ + /* * string items are for debugging. They just store a short string of * data in the FS @@ -3481,6 +3497,14 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item); void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root); +/* uuid-tree.c */ +int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid); +int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid); + /* dir-item.c */ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, const char *name, int name_len); diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c new file mode 100644 index 000000000000..04a04fa0f7c4 --- /dev/null +++ b/fs/btrfs/uuid-tree.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) STRATO AG 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include +#include +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "print-tree.h" + + +static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) +{ + key->type = type; + key->objectid = get_unaligned_le64(uuid); + key->offset = get_unaligned_le64(uuid + sizeof(u64)); +} + +/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */ +static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid, + u8 type, u64 subid) +{ + int ret; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + int slot; + u32 item_size; + unsigned long offset; + struct btrfs_key key; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -ENOENT; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = -ENOENT; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + offset = btrfs_item_ptr_offset(eb, slot); + ret = -ENOENT; + + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + goto out; + } + while (item_size) { + __le64 data; + + read_extent_buffer(eb, &data, offset, sizeof(data)); + if (le64_to_cpu(data) == subid) { + ret = 0; + break; + } + offset += sizeof(data); + item_size -= sizeof(data); + } + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid_cpu) +{ + int ret; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + unsigned long offset; + __le64 subid_le; + + ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu); + if (ret != -ENOENT) + return ret; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -EINVAL; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = btrfs_insert_empty_item(trans, uuid_root, path, &key, + sizeof(subid_le)); + if (ret >= 0) { + /* Add an item for the type for the first time */ + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + } else if (ret == -EEXIST) { + /* + * An item with that type already exists. + * Extend the item and store the new subid at the end. + */ + btrfs_extend_item(uuid_root, path, sizeof(subid_le)); + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); + } else if (ret < 0) { + pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n", + ret, (unsigned long long)key.objectid, + (unsigned long long)key.offset, type); + goto out; + } + + ret = 0; + subid_le = cpu_to_le64(subid_cpu); + write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le)); + btrfs_mark_buffer_dirty(eb); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid) +{ + int ret; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + unsigned long offset; + u32 item_size; + unsigned long move_dst; + unsigned long move_src; + unsigned long move_len; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -EINVAL; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); + if (ret < 0) { + pr_warn("btrfs: error %d while searching for uuid item!\n", + ret); + goto out; + } + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + item_size = btrfs_item_size_nr(eb, slot); + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + ret = -ENOENT; + goto out; + } + while (item_size) { + __le64 read_subid; + + read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid)); + if (le64_to_cpu(read_subid) == subid) + break; + offset += sizeof(read_subid); + item_size -= sizeof(read_subid); + } + + if (!item_size) { + ret = -ENOENT; + goto out; + } + + item_size = btrfs_item_size_nr(eb, slot); + if (item_size == sizeof(subid)) { + ret = btrfs_del_item(trans, uuid_root, path); + goto out; + } + + move_dst = offset; + move_src = offset + sizeof(subid); + move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); + memmove_extent_buffer(eb, move_dst, move_src, move_len); + btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1); + +out: + btrfs_free_path(path); + return ret; +} -- cgit v1.2.3 From 8f8ae8e213300378d2506b68d581c80d9dfd2faf Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:18 +0200 Subject: Btrfs: support printing UUID tree elements This commit adds support to print UUID tree elements to print-tree.c. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/print-tree.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fe7bf4a12568..32022f3f169b 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -155,6 +155,25 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) } #endif +static void print_uuid_item(struct extent_buffer *l, unsigned long offset, + u32 item_size) +{ + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + return; + } + while (item_size) { + __le64 subvol_id; + + read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id)); + printk(KERN_INFO "\t\tsubvol_id %llu\n", + (unsigned long long)le64_to_cpu(subvol_id)); + item_size -= sizeof(u64); + offset += sizeof(u64); + } +} + void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -301,6 +320,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_REPLACE_KEY: printk(KERN_INFO "\t\tdev replace\n"); break; + case BTRFS_UUID_KEY_SUBVOL: + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + print_uuid_item(l, btrfs_item_ptr_offset(l, i), + btrfs_item_size_nr(l, i)); + break; }; } } -- cgit v1.2.3 From f7a81ea4cc6bdb51d8267d2f3ff485f0b4070074 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:19 +0200 Subject: Btrfs: create UUID tree if required This tree is not created by mkfs.btrfs. Therefore when a filesystem is mounted writable and the UUID tree does not exist, this tree is created if required. The tree is also added to the fs_info structure and initialized, but this commit does not yet read or write UUID tree elements. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 34 ++++++++++++++++++++++++++++++++++ fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/volumes.c | 26 ++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 5 files changed, 65 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 429c54ac1a65..c03e61ef7230 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1306,6 +1306,7 @@ struct btrfs_fs_info { struct btrfs_root *fs_root; struct btrfs_root *csum_root; struct btrfs_root *quota_root; + struct btrfs_root *uuid_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f96a237bb40a..80bd63738c6d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1586,6 +1586,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) return fs_info->quota_root ? fs_info->quota_root : ERR_PTR(-ENOENT); + if (location->objectid == BTRFS_UUID_TREE_OBJECTID) + return fs_info->uuid_root ? fs_info->uuid_root : + ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) @@ -2044,6 +2047,12 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) info->quota_root->node = NULL; info->quota_root->commit_root = NULL; } + if (info->uuid_root) { + free_extent_buffer(info->uuid_root->node); + free_extent_buffer(info->uuid_root->commit_root); + info->uuid_root->node = NULL; + info->uuid_root->commit_root = NULL; + } if (chunk_root) { free_extent_buffer(info->chunk_root->node); free_extent_buffer(info->chunk_root->commit_root); @@ -2104,11 +2113,13 @@ int open_ctree(struct super_block *sb, struct btrfs_root *chunk_root; struct btrfs_root *dev_root; struct btrfs_root *quota_root; + struct btrfs_root *uuid_root; struct btrfs_root *log_tree_root; int ret; int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; + bool create_uuid_tree = false; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); @@ -2704,6 +2715,18 @@ retry_root_backup: fs_info->quota_root = quota_root; } + location.objectid = BTRFS_UUID_TREE_OBJECTID; + uuid_root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(uuid_root)) { + ret = PTR_ERR(uuid_root); + if (ret != -ENOENT) + goto recovery_tree_root; + create_uuid_tree = true; + } else { + uuid_root->track_dirty = 1; + fs_info->uuid_root = uuid_root; + } + fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -2890,6 +2913,17 @@ retry_root_backup: btrfs_qgroup_rescan_resume(fs_info); + if (create_uuid_tree) { + pr_info("btrfs: creating UUID tree\n"); + ret = btrfs_create_uuid_tree(fs_info); + if (ret) { + pr_warn("btrfs: failed to create the UUID tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } + return 0; fail_qgroup: diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f1c1694d34b7..8cb1d41ba501 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4361,6 +4361,9 @@ static struct btrfs_block_rsv *get_block_rsv( if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; + if (root == root->fs_info->uuid_root) + block_rsv = trans->block_rsv; + if (!block_rsv) block_rsv = root->block_rsv; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 306547b51a13..e084218c09d2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3429,6 +3429,32 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *uuid_root; + + /* + * 1 - root node + * 1 - root item + */ + trans = btrfs_start_transaction(tree_root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + uuid_root = btrfs_create_tree(trans, fs_info, + BTRFS_UUID_TREE_OBJECTID); + if (IS_ERR(uuid_root)) { + btrfs_abort_transaction(trans, tree_root, + PTR_ERR(uuid_root)); + return PTR_ERR(uuid_root); + } + + fs_info->uuid_root = uuid_root; + + return btrfs_commit_transaction(trans, tree_root); +} /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 08c44d9059b1..7071b2988305 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -322,6 +322,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, -- cgit v1.2.3 From dd5f9615fc5c5e8d3751aab3a17b92768fb1ce70 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:20 +0200 Subject: Btrfs: maintain subvolume items in the UUID tree When a new subvolume or snapshot is created, a new UUID item is added to the UUID tree. Such items are removed when the subvolume is deleted. The ioctl to set the received subvolume UUID is also touched and will now also add this received UUID into the UUID tree together with the ID of the subvolume. The latter is also done when read-only snapshots are created which inherit all the send/receive information from the parent subvolume. User mode programs use the BTRFS_IOC_TREE_SEARCH ioctl to search and read in the UUID tree. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/ioctl.c | 76 +++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/transaction.c | 20 ++++++++++++- 3 files changed, 86 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c03e61ef7230..bdc8498d6f03 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3724,6 +3724,7 @@ extern const struct dentry_operations btrfs_dentry_operations; long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); +int btrfs_is_empty_uuid(u8 *uuid); int btrfs_defrag_file(struct inode *inode, struct file *file, struct btrfs_ioctl_defrag_range_args *range, u64 newer_than, unsigned long max_pages); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82fb11a361c4..8e75249282c2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return 0; } +int btrfs_is_empty_uuid(u8 *uuid) +{ + static char empty_uuid[BTRFS_UUID_SIZE] = {0}; + + return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); +} + static noinline int create_subvol(struct inode *dir, struct dentry *dentry, char *name, int namelen, @@ -400,7 +407,7 @@ static noinline int create_subvol(struct inode *dir, * of create_snapshot(). */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 7, &qgroup_reserved, false); + 8, &qgroup_reserved, false); if (ret) return ret; @@ -522,9 +529,14 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, btrfs_ino(dir), index, name, namelen); - BUG_ON(ret); + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + objectid); + if (ret) + btrfs_abort_transaction(trans, root, ret); + fail: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -577,9 +589,10 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, * 1 - root item * 2 - root ref/backref * 1 - root of snapshot + * 1 - UUID item */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, 7, + &pending_snapshot->block_rsv, 8, &pending_snapshot->qgroup_reserved, false); if (ret) @@ -2235,6 +2248,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_end_trans; } } + + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + } + out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -2446,7 +2480,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; int ret = 0; char *s_uuid = NULL; - char empty_uuid[BTRFS_UUID_SIZE] = {0}; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -2455,7 +2488,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) if (IS_ERR(di_args)) return PTR_ERR(di_args); - if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) + if (!btrfs_is_empty_uuid(di_args->uuid)) s_uuid = di_args->uuid; mutex_lock(&fs_devices->device_list_mutex); @@ -4292,6 +4325,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_trans_handle *trans; struct timespec ct = CURRENT_TIME; int ret = 0; + int received_uuid_changed; ret = mnt_want_write_file(file); if (ret < 0) @@ -4321,7 +4355,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } - trans = btrfs_start_transaction(root, 1); + /* + * 1 - root item + * 2 - uuid items (received uuid + subvol uuid) + */ + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; @@ -4332,6 +4370,14 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; + received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, + BTRFS_UUID_SIZE); + if (received_uuid_changed && + !btrfs_is_empty_uuid(root_item->received_uuid)) + btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); @@ -4344,12 +4390,22 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, &root->root_key, &root->root_item); if (ret < 0) { btrfs_end_transaction(trans, root); - trans = NULL; goto out; - } else { - ret = btrfs_commit_transaction(trans, root); - if (ret < 0) + } + if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + sa->uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); + if (ret < 0 && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); goto out; + } + } + ret = btrfs_commit_transaction(trans, root); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + goto out; } ret = copy_to_user(arg, sa, sizeof(*sa)); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 579efcc1de16..72ab0dd31937 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1311,8 +1311,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b, + BTRFS_UUID_KEY_SUBVOL, objectid); + if (ret) { btrfs_abort_transaction(trans, root, ret); + goto fail; + } + if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + new_root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + objectid); + if (ret && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + } fail: pending->error = ret; dir_item_existed: -- cgit v1.2.3 From 803b2f54fb4faf6d76fca43e59bcc555d9713cd4 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:21 +0200 Subject: Btrfs: fill UUID tree initially When the UUID tree is initially created, a task is spawned that walks through the root tree. For each found subvolume root_item, the uuid and received_uuid entries in the UUID tree are added. This is such a quick operation so that in case somebody wants to unmount the filesystem while the task is still running, the unmount is delayed until the UUID tree building task is finished. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/disk-io.c | 7 +++ fs/btrfs/volumes.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 160 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bdc8498d6f03..df2b9c22b4cf 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1654,6 +1655,8 @@ struct btrfs_fs_info { struct btrfs_dev_replace dev_replace; atomic_t mutually_exclusive_operation_running; + + struct semaphore uuid_tree_rescan_sem; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 80bd63738c6d..fa49e900216a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -2297,6 +2298,7 @@ int open_ctree(struct super_block *sb, init_rwsem(&fs_info->extent_commit_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); + sema_init(&fs_info->uuid_tree_rescan_sem, 1); fs_info->dev_replace.lock_owner = 0; atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); @@ -3554,6 +3556,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the uuid_scan task to finish */ + down(&fs_info->uuid_tree_rescan_sem); + /* avoid complains from lockdep et al., set sem back to initial state */ + up(&fs_info->uuid_tree_rescan_sem); + /* pause restriper - we want to resume on mount */ btrfs_pause_balance(fs_info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e084218c09d2..4066803fe765 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -3429,11 +3430,146 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +static int btrfs_uuid_scan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = data; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path = NULL; + int ret = 0; + struct extent_buffer *eb; + int slot; + struct btrfs_root_item root_item; + u32 item_size; + struct btrfs_trans_handle *trans; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + + max_key.objectid = (u64)-1; + max_key.type = BTRFS_ROOT_ITEM_KEY; + max_key.offset = (u64)-1; + + path->keep_locks = 1; + + while (1) { + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + break; + } + + if (key.type != BTRFS_ROOT_ITEM_KEY || + (key.objectid < BTRFS_FIRST_FREE_OBJECTID && + key.objectid != BTRFS_FS_TREE_OBJECTID) || + key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto skip; + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + if (item_size < sizeof(root_item)) + goto skip; + + trans = NULL; + read_extent_buffer(eb, &root_item, + btrfs_item_ptr_offset(eb, slot), + (int)sizeof(root_item)); + if (btrfs_root_refs(&root_item) == 0) + goto skip; + if (!btrfs_is_empty_uuid(root_item.uuid)) { + /* + * 1 - subvol uuid item + * 1 - received_subvol uuid item + */ + trans = btrfs_start_transaction(fs_info->uuid_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.uuid, + BTRFS_UUID_KEY_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + btrfs_end_transaction(trans, + fs_info->uuid_root); + break; + } + } + + if (!btrfs_is_empty_uuid(root_item.received_uuid)) { + if (!trans) { + /* 1 - received_subvol uuid item */ + trans = btrfs_start_transaction( + fs_info->uuid_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + btrfs_end_transaction(trans, + fs_info->uuid_root); + break; + } + } + + if (trans) { + ret = btrfs_end_transaction(trans, fs_info->uuid_root); + if (ret) + break; + } + +skip: + btrfs_release_path(path); + if (key.offset < (u64)-1) { + key.offset++; + } else if (key.type < BTRFS_ROOT_ITEM_KEY) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + } else if (key.objectid < (u64)-1) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.objectid++; + } else { + break; + } + cond_resched(); + } + +out: + btrfs_free_path(path); + if (ret) + pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + up(&fs_info->uuid_tree_rescan_sem); + return 0; +} + int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *uuid_root; + struct task_struct *task; + int ret; /* * 1 - root node @@ -3453,8 +3589,21 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) fs_info->uuid_root = uuid_root; - return btrfs_commit_transaction(trans, tree_root); + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + pr_warn("btrfs: failed to start uuid_scan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; } + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. -- cgit v1.2.3 From 26432799c902b76e87f68f5c88f2146a78ba84af Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:22 +0200 Subject: Btrfs: introduce uuid-tree-gen field In order to be able to detect the case that a filesystem is mounted with an old kernel, add a uuid-tree-gen field like the free space cache is doing it. It is part of the super block and written with each commit. Old kernels do not know this field and don't update it. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 ++++- fs/btrfs/transaction.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index df2b9c22b4cf..02b1cefbc308 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -482,9 +482,10 @@ struct btrfs_super_block { char label[BTRFS_LABEL_SIZE]; __le64 cache_generation; + __le64 uuid_tree_generation; /* future expansion */ - __le64 reserved[31]; + __le64 reserved[30]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; } __attribute__ ((__packed__)); @@ -2900,6 +2901,8 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, cache_generation, 64); BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); +BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); static inline int btrfs_super_csum_size(struct btrfs_super_block *s) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 72ab0dd31937..e76237c8802a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1380,6 +1380,7 @@ static void update_super_roots(struct btrfs_root *root) super->root_level = root_item->level; if (btrfs_test_opt(root, SPACE_CACHE)) super->cache_generation = root_item->generation; + super->uuid_tree_generation = root_item->generation; } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) -- cgit v1.2.3 From 70f801754728017ebc909d603c69255dc1e6f06f Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:23 +0200 Subject: Btrfs: check UUID tree during mount if required If the filesystem was mounted with an old kernel that was not aware of the UUID tree, this is detected by looking at the uuid_tree_generation field of the superblock (similar to how the free space cache is doing it). If a mismatch is detected at mount time, a thread is started that does two things: 1. Iterate through the UUID tree, check each entry, delete those entries that are not valid anymore (i.e., the subvol does not exist anymore or the value changed). 2. Iterate through the root tree, for each found subvolume, add the UUID tree entries for the subvolume (if they are not already there). This mechanism is also used to handle and repair errors that happened during the initial creation and filling of the tree. The update of the uuid_tree_generation field (which indicates that the state of the UUID tree is up to date) is blocked until all create and repair operations are successfully completed. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++ fs/btrfs/disk-io.c | 18 +++++++- fs/btrfs/transaction.c | 3 +- fs/btrfs/uuid-tree.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 83 +++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 6 files changed, 230 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 02b1cefbc308..8c954766db93 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1658,6 +1658,7 @@ struct btrfs_fs_info { atomic_t mutually_exclusive_operation_running; struct semaphore uuid_tree_rescan_sem; + unsigned int update_uuid_tree_gen:1; }; /* @@ -3511,6 +3512,9 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, struct btrfs_root *uuid_root, u8 *uuid, u8 type, u64 subid); +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, + int (*check_func)(struct btrfs_fs_info *, u8 *, u8, + u64)); /* dir-item.c */ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fa49e900216a..e7ef82ad0d26 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2120,7 +2120,8 @@ int open_ctree(struct super_block *sb, int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; - bool create_uuid_tree = false; + bool create_uuid_tree; + bool check_uuid_tree; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); @@ -2724,9 +2725,13 @@ retry_root_backup: if (ret != -ENOENT) goto recovery_tree_root; create_uuid_tree = true; + check_uuid_tree = false; } else { uuid_root->track_dirty = 1; fs_info->uuid_root = uuid_root; + create_uuid_tree = false; + check_uuid_tree = + generation != btrfs_super_uuid_tree_generation(disk_super); } fs_info->generation = generation; @@ -2924,6 +2929,17 @@ retry_root_backup: close_ctree(tree_root); return ret; } + } else if (check_uuid_tree) { + pr_info("btrfs: checking UUID tree\n"); + ret = btrfs_check_uuid_tree(fs_info); + if (ret) { + pr_warn("btrfs: failed to check the UUID tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } else { + fs_info->update_uuid_tree_gen = 1; } return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e76237c8802a..e0336b9a4385 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1380,7 +1380,8 @@ static void update_super_roots(struct btrfs_root *root) super->root_level = root_item->level; if (btrfs_test_opt(root, SPACE_CACHE)) super->cache_generation = root_item->generation; - super->uuid_tree_generation = root_item->generation; + if (root->fs_info->update_uuid_tree_gen) + super->uuid_tree_generation = root_item->generation; } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 04a04fa0f7c4..dd0dea3766f7 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -233,3 +233,126 @@ out: btrfs_free_path(path); return ret; } + +static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid) +{ + struct btrfs_trans_handle *trans; + int ret; + + /* 1 - for the uuid item */ + trans = btrfs_start_transaction(uuid_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid); + btrfs_end_transaction(trans, uuid_root); + +out: + return ret; +} + +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, + int (*check_func)(struct btrfs_fs_info *, u8 *, u8, + u64)) +{ + struct btrfs_root *root = fs_info->uuid_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path; + int ret = 0; + struct extent_buffer *leaf; + int slot; + u32 item_size; + unsigned long offset; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = 0; + key.offset = 0; + max_key.objectid = (u64)-1; + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + +again_search_slot: + path->keep_locks = 1; + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + goto out; + } + + while (1) { + cond_resched(); + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + if (key.type != BTRFS_UUID_KEY_SUBVOL && + key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto skip; + + offset = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(leaf, slot); + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + goto skip; + } + while (item_size) { + u8 uuid[BTRFS_UUID_SIZE]; + __le64 subid_le; + u64 subid_cpu; + + put_unaligned_le64(key.objectid, uuid); + put_unaligned_le64(key.offset, uuid + sizeof(u64)); + read_extent_buffer(leaf, &subid_le, offset, + sizeof(subid_le)); + subid_cpu = le64_to_cpu(subid_le); + ret = check_func(fs_info, uuid, key.type, subid_cpu); + if (ret < 0) + goto out; + if (ret > 0) { + btrfs_release_path(path); + ret = btrfs_uuid_iter_rem(root, uuid, key.type, + subid_cpu); + if (ret == 0) { + /* + * this might look inefficient, but the + * justification is that it is an + * exception that check_func returns 1, + * and that in the regular case only one + * entry per UUID exists. + */ + goto again_search_slot; + } + if (ret < 0 && ret != -ENOENT) + goto out; + } + item_size -= sizeof(subid_le); + offset += sizeof(subid_le); + } + +skip: + ret = btrfs_next_item(root, path); + if (ret == 0) + continue; + else if (ret > 0) + ret = 0; + break; + } + +out: + btrfs_free_path(path); + if (ret) + pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret); + return 0; +} diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4066803fe765..75bdea6bf188 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3559,10 +3559,76 @@ out: btrfs_free_path(path); if (ret) pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + else + fs_info->update_uuid_tree_gen = 1; up(&fs_info->uuid_tree_rescan_sem); return 0; } +/* + * Callback for btrfs_uuid_tree_iterate(). + * returns: + * 0 check succeeded, the entry is not outdated. + * < 0 if an error occured. + * > 0 if the check failed, which means the caller shall remove the entry. + */ +static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, + u8 *uuid, u8 type, u64 subid) +{ + struct btrfs_key key; + int ret = 0; + struct btrfs_root *subvol_root; + + if (type != BTRFS_UUID_KEY_SUBVOL && + type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto out; + + key.objectid = subid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + subvol_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(subvol_root)) { + ret = PTR_ERR(subvol_root); + if (ret == -ENOENT) + ret = 1; + goto out; + } + + switch (type) { + case BTRFS_UUID_KEY_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE)) + ret = 1; + break; + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.received_uuid, + BTRFS_UUID_SIZE)) + ret = 1; + break; + } + +out: + return ret; +} + +static int btrfs_uuid_rescan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data; + int ret; + + /* + * 1st step is to iterate through the existing UUID tree and + * to delete all entries that contain outdated data. + * 2nd step is to add all missing entries to the UUID tree. + */ + ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); + if (ret < 0) { + pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); + up(&fs_info->uuid_tree_rescan_sem); + return ret; + } + return btrfs_uuid_scan_kthread(data); +} + int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) { struct btrfs_trans_handle *trans; @@ -3596,6 +3662,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) down(&fs_info->uuid_tree_rescan_sem); task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ pr_warn("btrfs: failed to start uuid_scan task\n"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); @@ -3604,6 +3671,22 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) return 0; } +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct task_struct *task; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + pr_warn("btrfs: failed to start uuid_rescan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7071b2988305..d98b942c3896 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -323,6 +323,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, -- cgit v1.2.3 From f420ee1e923b931eeef5e2928791e84b1197cab3 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:24 +0200 Subject: Btrfs: add mount option to force UUID tree checking This should never be needed, but since all functions are there to check and rebuild the UUID tree, a mount option is added that allows to force this check and rebuild procedure. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 3 ++- fs/btrfs/super.c | 8 +++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8c954766db93..0ef9d866952f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1998,6 +1998,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) +#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e7ef82ad0d26..00914a62abec 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2929,7 +2929,8 @@ retry_root_backup: close_ctree(tree_root); return ret; } - } else if (check_uuid_tree) { + } else if (check_uuid_tree || + btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { pr_info("btrfs: checking UUID tree\n"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1bd0bb523660..57090b4284fe 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -321,7 +321,7 @@ enum { Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_skip_balance, Opt_check_integrity, Opt_check_integrity_including_extent_data, - Opt_check_integrity_print_mask, Opt_fatal_errors, + Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, Opt_commit_interval, Opt_err, }; @@ -362,6 +362,7 @@ static match_table_t tokens = { {Opt_check_integrity, "check_int"}, {Opt_check_integrity_including_extent_data, "check_int_data"}, {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, + {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, {Opt_fatal_errors, "fatal_errors=%s"}, {Opt_commit_interval, "commit=%d"}, {Opt_err, NULL}, @@ -572,6 +573,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_space_cache: btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_rescan_uuid_tree: + btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); + break; case Opt_no_space_cache: printk(KERN_INFO "btrfs: disabling disk space caching\n"); btrfs_clear_opt(info->mount_opt, SPACE_CACHE); @@ -979,6 +983,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",space_cache"); else seq_puts(seq, ",nospace_cache"); + if (btrfs_test_opt(root, RESCAN_UUID_TREE)) + seq_puts(seq, ",rescan_uuid_tree"); if (btrfs_test_opt(root, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) -- cgit v1.2.3 From 633085c79c84c391732caa40f5b3a3d2b299f7be Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 16 Aug 2013 15:23:33 +0100 Subject: Btrfs: reset force_compress on btrfs_file_defrag failure After we set force_compress with a new value (which was not being done while holding the inode mutex), if an error happens and we jump to the label out_ra, the force_compress property of the inode is not set to BTRFS_COMPRESS_NONE (unlike in the case where no errors happen). Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8e75249282c2..076cc7fff813 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1285,9 +1285,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, cluster = max_cluster; } - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = compress_type; - if (i + cluster > ra_index) { ra_index = max(i, ra_index); btrfs_force_ra(inode->i_mapping, ra, file, ra_index, @@ -1296,6 +1293,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = compress_type; ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) { mutex_unlock(&inode->i_mutex); @@ -1352,10 +1351,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, atomic_read(&root->fs_info->async_delalloc_pages) == 0)); } atomic_dec(&root->fs_info->async_submit_draining); - - mutex_lock(&inode->i_mutex); - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; - mutex_unlock(&inode->i_mutex); } if (range->compress_type == BTRFS_COMPRESS_LZO) { @@ -1365,6 +1360,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ret = defrag_count; out_ra: + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; + mutex_unlock(&inode->i_mutex); + } if (!file) kfree(ra); kfree(pages); -- cgit v1.2.3 From 68b823ef41f17b05ce8d04e10f0635e068451f6c Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 16 Aug 2013 14:02:33 -0400 Subject: Btrfs: use __u64 in exported user headers Signed-off-by: Mike Frysinger Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- include/uapi/linux/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 90d7bd9d839c..45e618921c61 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -549,7 +549,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_search_args) #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ struct btrfs_ioctl_space_args) #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) -- cgit v1.2.3 From bbb651e469d99f0088e286fdeb54acca7bb4ad4e Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 19 Aug 2013 18:51:13 +0200 Subject: Btrfs: don't allow the replace procedure on read only filesystems If you start the replace procedure on a read only filesystem, at the end the procedure fails to write the updated dev_items to the chunk tree. The problem is that this error is not indicated except for a WARN_ON(). If the user now thinks that everything was done as expected and destroys the source device (with mkfs or with a hammer). The next mount fails with "failed to read chunk root" and the filesystem is gone. This commit adds code to fail the attempt to start the replace procedure if the filesystem is mounted read-only. Signed-off-by: Stefan Behrens Cc: # 3.10+ Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 076cc7fff813..559cb161a60a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3670,6 +3670,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { -- cgit v1.2.3 From 53f10659f9994df8efe788f82d3da78d48e650c5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 12 Aug 2013 14:33:01 +0300 Subject: Btrfs: find_next_devid: root -> fs_info find_next_devid() knows which root to search, so it should take an fs_info instead of an arbitrary root. Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 75bdea6bf188..9b3595e370f8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1308,15 +1308,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) return ret; } -static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) +static noinline int find_next_devid(struct btrfs_fs_info *fs_info, + u64 *devid_ret) { int ret; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - root = root->fs_info->chunk_root; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1325,20 +1324,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) key.type = BTRFS_DEV_ITEM_KEY; key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) goto error; BUG_ON(ret == 0); /* Corruption */ - ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + ret = btrfs_previous_item(fs_info->chunk_root, path, + BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); if (ret) { - *objectid = 1; + *devid_ret = 1; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.offset + 1; + *devid_ret = found_key.offset + 1; } ret = 0; error: @@ -1974,7 +1974,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root, &device->devid); + ret = find_next_devid(root->fs_info, &device->devid); if (ret) { rcu_string_free(device->name); kfree(device); -- cgit v1.2.3 From 12bd2fc0d2f589f9605b8f497eee2e7724f3af24 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 23 Aug 2013 13:20:17 +0300 Subject: Btrfs: add btrfs_alloc_device and switch to it Currently btrfs_device is allocated ad-hoc in a few different places, and as a result not all fields are initialized properly. In particular, readahead state is only initialized in device_list_add (at scan time), and not in btrfs_init_new_device (when the new device is added with 'btrfs dev add'). Fix this by adding an allocation helper and switch everybody but __btrfs_close_devices to it. (__btrfs_close_devices is dealt with in a later commit.) Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 151 +++++++++++++++++++++++++++++++++-------------------- fs/btrfs/volumes.h | 3 ++ 2 files changed, 97 insertions(+), 57 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9b3595e370f8..4bc07d910e54 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -102,6 +102,27 @@ void btrfs_cleanup_fs_uuids(void) } } +static struct btrfs_device *__alloc_device(void) +{ + struct btrfs_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_NOFS); + if (!dev) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dev->dev_list); + INIT_LIST_HEAD(&dev->dev_alloc_list); + + spin_lock_init(&dev->io_lock); + + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + + return dev; +} + static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { @@ -415,17 +436,12 @@ static noinline int device_list_add(const char *path, if (fs_devices->opened) return -EBUSY; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(NULL, &devid, + disk_super->dev_item.uuid); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return -ENOMEM; + return PTR_ERR(device); } - device->devid = devid; - device->dev_stats_valid = 0; - device->work.func = pending_bios_fn; - memcpy(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE); - spin_lock_init(&device->io_lock); name = rcu_string_strdup(path, GFP_NOFS); if (!name) { @@ -433,15 +449,6 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } rcu_assign_pointer(device->name, name); - INIT_LIST_HEAD(&device->dev_alloc_list); - - /* init readahead state */ - spin_lock_init(&device->reada_lock); - device->reada_curr_zone = NULL; - atomic_set(&device->reada_in_flight, 0); - device->reada_next = 0; - INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); @@ -492,8 +499,9 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) list_for_each_entry(orig_dev, &orig->devices, dev_list) { struct rcu_string *name; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &orig_dev->devid, + orig_dev->uuid); + if (IS_ERR(device)) goto error; /* @@ -507,13 +515,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) } rcu_assign_pointer(device->name, name); - device->devid = orig_dev->devid; - device->work.func = pending_bios_fn; - memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_list); - INIT_LIST_HEAD(&device->dev_alloc_list); - list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -1959,10 +1960,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(root->fs_info, NULL, NULL); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - ret = -ENOMEM; + ret = PTR_ERR(device); goto error; } @@ -1974,13 +1975,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root->fs_info, &device->devid); - if (ret) { - rcu_string_free(device->name); - kfree(device); - goto error; - } - trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); @@ -1995,9 +1989,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (blk_queue_discard(q)) device->can_discard = 1; device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2124,6 +2115,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *devices; struct rcu_string *name; + u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; @@ -2145,9 +2137,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } } - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { - ret = -ENOMEM; + device = btrfs_alloc_device(NULL, &devid, NULL); + if (IS_ERR(device)) { + ret = PTR_ERR(device); goto error; } @@ -2164,10 +2156,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->can_discard = 1; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - device->devid = BTRFS_DEV_REPLACE_DEVID; - spin_lock_init(&device->io_lock); device->generation = 0; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -5572,23 +5560,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &devid, dev_uuid); + if (IS_ERR(device)) return NULL; - list_add(&device->dev_list, - &fs_devices->devices); - device->devid = devid; - device->work.func = pending_bios_fn; + + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - device->missing = 1; fs_devices->num_devices++; + + device->missing = 1; fs_devices->missing_devices++; - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_alloc_list); - memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; } +/** + * btrfs_alloc_device - allocate struct btrfs_device + * @fs_info: used only for generating a new devid, can be NULL if + * devid is provided (i.e. @devid != NULL). + * @devid: a pointer to devid for this device. If NULL a new devid + * is generated. + * @uuid: a pointer to UUID for this device. If NULL a new UUID + * is generated. + * + * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() + * on error. Returned struct is not linked onto any lists and can be + * destroyed with kfree() right away. + */ +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid) +{ + struct btrfs_device *dev; + u64 tmp; + + if (!devid && !fs_info) { + WARN_ON(1); + return ERR_PTR(-EINVAL); + } + + dev = __alloc_device(); + if (IS_ERR(dev)) + return dev; + + if (devid) + tmp = *devid; + else { + int ret; + + ret = find_next_devid(fs_info, &tmp); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + } + dev->devid = tmp; + + if (uuid) + memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE); + else + generate_random_uuid(dev->uuid); + + dev->work.func = pending_bios_fn; + + return dev; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d98b942c3896..b72f540c8b29 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -305,6 +305,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, char *device_path, struct btrfs_device **device); +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid); int btrfs_rm_device(struct btrfs_root *root, char *device_path); void btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); -- cgit v1.2.3 From 2208a378f35fea7a1b778bf856edb971fb7ea9e8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 12 Aug 2013 14:33:03 +0300 Subject: Btrfs: add alloc_fs_devices and switch to it In the spirit of btrfs_alloc_device, add a helper for allocating and doing some common initialization of btrfs_fs_devices struct. Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 71 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4bc07d910e54..d38065eca7c9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -63,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static struct btrfs_fs_devices *__alloc_fs_devices(void) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + if (!fs_devs) + return ERR_PTR(-ENOMEM); + + mutex_init(&fs_devs->device_list_mutex); + + INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->alloc_list); + INIT_LIST_HEAD(&fs_devs->list); + + return fs_devs; +} + +/** + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: a pointer to UUID for this FS. If NULL a new UUID is + * generated. + * + * Return: a pointer to a new &struct btrfs_fs_devices on success; + * ERR_PTR() on error. Returned struct is not linked onto any lists and + * can be destroyed with kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = __alloc_fs_devices(); + if (IS_ERR(fs_devs)) + return fs_devs; + + if (fsid) + memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); + else + generate_random_uuid(fs_devs->fsid); + + return fs_devs; +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -417,16 +459,14 @@ static noinline int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return -ENOMEM; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); + fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return PTR_ERR(fs_devices); + list_add(&fs_devices->list, &fs_uuids); - memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - mutex_init(&fs_devices->device_list_mutex); + device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -482,18 +522,13 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *device; struct btrfs_device *orig_dev; - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return ERR_PTR(-ENOMEM); + fs_devices = alloc_fs_devices(orig->fsid); + if (IS_ERR(fs_devices)) + return fs_devices; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); - INIT_LIST_HEAD(&fs_devices->list); - mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; fs_devices->total_devices = orig->total_devices; - memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { @@ -1797,9 +1832,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) if (!fs_devices->seeding) return -EINVAL; - seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!seed_devices) - return -ENOMEM; + seed_devices = __alloc_fs_devices(); + if (IS_ERR(seed_devices)) + return PTR_ERR(seed_devices); old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { -- cgit v1.2.3 From a1e8780a89ec13217fa1c8f86faf5546110e1402 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 12 Aug 2013 14:33:04 +0300 Subject: Btrfs: rollback btrfs_device fields on umount It turns out we don't properly rollback in-core btrfs_device state on umount. We zero out ->bdev, ->in_fs_metadata and that's about it. In particular, we don't zero out ->generation, and this can lead to us refusing a mount -- a non-NULL fs_devices->latest_bdev is essential, but btrfs_close_extra_devices will happily assign NULL to ->latest_bdev if the first device on the dev_list happens to be missing and consequently has no bdev attached. This happens because since commit a6b0d5c8 btrfs_close_extra_devices adjusts ->latest_bdev, and in doing that, relies on the ->generation. Fix this, and possibly other problems, by zeroing out everything except for what device_list_add sets, so that a mount right after insmod and 'btrfs dev scan' is no different from any later mount in this respect. Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d38065eca7c9..ed685991b2c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -674,22 +674,19 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; - new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ - memcpy(new_device, device, sizeof(*new_device)); + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ /* Safe because we are under uuid_mutex */ if (device->name) { name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ + BUG_ON(!name); /* -ENOMEM */ rcu_assign_pointer(new_device->name, name); } - new_device->bdev = NULL; - new_device->writeable = 0; - new_device->in_fs_metadata = 0; - new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; call_rcu(&device->rcu, free_device); } -- cgit v1.2.3 From 1cb048f596cc7b4405d695d65ca70098a7ab9065 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 20 Aug 2013 00:52:51 +0100 Subject: Btrfs: fix memory leak of orphan block rsv This issue is simple to reproduce and observe if kmemleak is enabled. Two simple ways to reproduce it: ** 1 $ mkfs.btrfs -f /dev/loop0 $ mount /dev/loop0 /mnt/btrfs $ btrfs balance start /mnt/btrfs $ umount /mnt/btrfs ** 2 $ mkfs.btrfs -f /dev/loop0 $ mount /dev/loop0 /mnt/btrfs $ touch /mnt/btrfs/foobar $ rm -f /mnt/btrfs/foobar $ umount /mnt/btrfs After a while, kmemleak reports the leak: $ cat /sys/kernel/debug/kmemleak unreferenced object 0xffff880402b13e00 (size 128): comm "btrfs", pid 19621, jiffies 4341648183 (age 70057.844s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 fc c6 b1 04 88 ff ff 04 00 04 00 ad 4e ad de .............N.. backtrace: [] kmemleak_alloc+0x26/0x50 [] kmem_cache_alloc_trace+0xeb/0x1d0 [] btrfs_alloc_block_rsv+0x39/0x70 [btrfs] [] btrfs_orphan_add+0x13d/0x1b0 [btrfs] [] btrfs_remove_block_group+0x143/0x500 [btrfs] [] btrfs_relocate_chunk.isra.63+0x618/0x790 [btrfs] [] btrfs_balance+0x8f7/0xe90 [btrfs] [] btrfs_ioctl_balance+0x250/0x550 [btrfs] [] btrfs_ioctl+0xdfa/0x25f0 [btrfs] [] do_vfs_ioctl+0x96/0x570 [] SyS_ioctl+0x91/0xb0 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff This affects btrfs-next, revision be8e3cd00d7293dd177e3f8a4a1645ce09ca3acb (Btrfs: separate out tests into their own directory). Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 00914a62abec..c60372de6647 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3484,6 +3484,8 @@ static void free_fs_root(struct btrfs_root *root) { iput(root->cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; if (root->anon_dev) free_anon_bdev(root->anon_dev); free_extent_buffer(root->node); @@ -3641,6 +3643,9 @@ int close_ctree(struct btrfs_root *root) btrfs_free_stripe_hash_table(fs_info); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; + return 0; } -- cgit v1.2.3 From c1c9ff7c94e83fae89a742df74db51156869bad5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:07 +0200 Subject: Btrfs: Remove superfluous casts from u64 to unsigned long long u64 is "unsigned long long" on all architectures now, so there's no need to cast it when formatting it using the "ll" length modifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 24 ++-- fs/btrfs/check-integrity.c | 293 ++++++++++++++------------------------------ fs/btrfs/compression.c | 5 +- fs/btrfs/ctree.c | 6 +- fs/btrfs/delayed-inode.c | 10 +- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/disk-io.c | 25 ++-- fs/btrfs/extent-tree.c | 47 +++---- fs/btrfs/extent_io.c | 30 ++--- fs/btrfs/file-item.c | 4 +- fs/btrfs/free-space-cache.c | 6 +- fs/btrfs/inode.c | 23 ++-- fs/btrfs/ioctl.c | 10 +- fs/btrfs/ordered-data.c | 11 +- fs/btrfs/print-tree.c | 79 +++++------- fs/btrfs/qgroup.c | 10 +- fs/btrfs/relocation.c | 7 +- fs/btrfs/root-tree.c | 3 +- fs/btrfs/scrub.c | 16 +-- fs/btrfs/super.c | 10 +- fs/btrfs/transaction.c | 3 +- fs/btrfs/volumes.c | 19 ++- 22 files changed, 221 insertions(+), 422 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 977e9c66f2e3..3da8f42e08a9 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -295,10 +295,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", - (unsigned long long)ref->root_id, level, ref->count, ret, - (unsigned long long)ref->key_for_search.objectid, - ref->key_for_search.type, - (unsigned long long)ref->key_for_search.offset); + ref->root_id, level, ref->count, ret, + ref->key_for_search.objectid, ref->key_for_search.type, + ref->key_for_search.offset); if (ret < 0) goto out; @@ -1328,8 +1327,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, found_key->type != BTRFS_METADATA_ITEM_KEY) || found_key->objectid > logical || found_key->objectid + size <= logical) { - pr_debug("logical %llu is not within any extent\n", - (unsigned long long)logical); + pr_debug("logical %llu is not within any extent\n", logical); return -ENOENT; } @@ -1342,11 +1340,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, pr_debug("logical %llu is at position %llu within the extent (%llu " "EXTENT_ITEM %llu) flags %#llx size %u\n", - (unsigned long long)logical, - (unsigned long long)(logical - found_key->objectid), - (unsigned long long)found_key->objectid, - (unsigned long long)found_key->offset, - (unsigned long long)flags, item_size); + logical, logical - found_key->objectid, found_key->objectid, + found_key->offset, flags, item_size); WARN_ON(!flags_ret); if (flags_ret) { @@ -1518,7 +1513,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, while (!ret && (root_node = ulist_next(roots, &root_uiter))) { pr_debug("root %llu references leaf %llu, data list " "%#llx\n", root_node->val, ref_node->val, - (long long)ref_node->aux); + ref_node->aux); ret = iterate_leaf_refs((struct extent_inode_elem *) (uintptr_t)ref_node->aux, root_node->val, @@ -1610,9 +1605,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, name_len = btrfs_inode_ref_name_len(eb, iref); /* path must be released before calling iterate()! */ pr_debug("following ref at offset %u for inode %llu in " - "tree %llu\n", cur, - (unsigned long long)found_key.objectid, - (unsigned long long)fs_root->objectid); + "tree %llu\n", cur, found_key.objectid, + fs_root->objectid); ret = iterate(parent, name_len, (unsigned long)(iref + 1), eb, ctx); if (ret) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 07f39a3dedb0..fa68c8ab9a15 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr = btrfs_super_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "root@%llu\n", next_bytenr); break; case 1: next_bytenr = btrfs_super_chunk_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "chunk@%llu\n", next_bytenr); break; case 2: next_bytenr = btrfs_super_log_root(selected_super); @@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "log@%llu\n", next_bytenr); break; } @@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; @@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, printk(KERN_INFO "btrfsic:" " btrfsic_map_block(root @%llu," " mirror %d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); kfree(selected_super); return -1; } @@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, if (ret < (int)PAGE_CACHE_SIZE) { printk(KERN_INFO "btrfsic: read @logical %llu failed!\n", - (unsigned long long) tmp_next_block_ctx.start); btrfsic_release_block_ctx(&tmp_next_block_ctx); kfree(selected_super); @@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror( printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" " @%llu (%s/%llu/%d)\n", superblock_bdev, - rcu_str_deref(device->name), - (unsigned long long)dev_bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + rcu_str_deref(device->name), dev_bytenr, + dev_state->name, dev_bytenr, superblock_mirror_num); list_add(&superblock_tmp->all_blocks_node, &state->all_blocks_list); @@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror( next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; struct btrfsic_block_data_ctx tmp_next_block_ctx; @@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror( mirror_num)) { printk(KERN_INFO "btrfsic: btrfsic_map_block(" "bytenr @%llu, mirror %d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); brelse(bh); return -1; } @@ -1009,13 +1001,9 @@ continue_with_new_stack_frame: printk(KERN_INFO "leaf %llu items %d generation %llu" " owner %llu\n", - (unsigned long long) - sf->block_ctx->start, - sf->nr, - (unsigned long long) + sf->block_ctx->start, sf->nr, btrfs_stack_header_generation( &leafhdr->header), - (unsigned long long) btrfs_stack_header_owner( &leafhdr->header)); } @@ -1137,13 +1125,10 @@ leaf_item_out_of_bounce_error: if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "node %llu level %d items %d" " generation %llu owner %llu\n", - (unsigned long long) sf->block_ctx->start, nodehdr->header.level, sf->nr, - (unsigned long long) btrfs_stack_header_generation( &nodehdr->header), - (unsigned long long) btrfs_stack_header_owner( &nodehdr->header)); } @@ -1294,7 +1279,7 @@ static int btrfsic_create_link_to_next_block( next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, *num_copiesp); + next_bytenr, *num_copiesp); *mirror_nump = 1; } @@ -1311,7 +1296,7 @@ static int btrfsic_create_link_to_next_block( if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", - (unsigned long long)next_bytenr, *mirror_nump); + next_bytenr, *mirror_nump); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1339,20 +1324,16 @@ static int btrfsic_create_link_to_next_block( "Referenced block @%llu (%s/%llu/%d)" " found in hash table, %c," " bytenr mismatch (!= stored %llu).\n", - (unsigned long long)next_bytenr, - next_block_ctx->dev->name, - (unsigned long long)next_block_ctx->dev_bytenr, - *mirror_nump, + next_bytenr, next_block_ctx->dev->name, + next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block), - (unsigned long long)next_block->logical_bytenr); + next_block->logical_bytenr); } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Referenced block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)next_bytenr, - next_block_ctx->dev->name, - (unsigned long long)next_block_ctx->dev_bytenr, - *mirror_nump, + next_bytenr, next_block_ctx->dev->name, + next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block)); next_block->logical_bytenr = next_bytenr; @@ -1404,7 +1385,7 @@ static int btrfsic_create_link_to_next_block( if (ret < (int)next_block_ctx->len) { printk(KERN_INFO "btrfsic: read block @logical %llu failed!\n", - (unsigned long long)next_bytenr); + next_bytenr); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1452,7 +1433,6 @@ static int btrfsic_handle_extent_data( if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", file_extent_item.type, - (unsigned long long) btrfs_stack_file_extent_disk_bytenr( &file_extent_item)); return 0; @@ -1478,11 +1458,9 @@ static int btrfsic_handle_extent_data( printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," " offset = %llu, num_bytes = %llu\n", file_extent_item.type, - (unsigned long long) btrfs_stack_file_extent_disk_bytenr(&file_extent_item), - (unsigned long long) btrfs_stack_file_extent_offset(&file_extent_item), - (unsigned long long)num_bytes); + num_bytes); while (num_bytes > 0) { u32 chunk_len; int num_copies; @@ -1498,7 +1476,7 @@ static int btrfsic_handle_extent_data( next_bytenr, state->datablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block_data_ctx next_block_ctx; struct btrfsic_block *next_block; @@ -1510,8 +1488,7 @@ static int btrfsic_handle_extent_data( if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "\tdisk_bytenr = %llu, num_bytes %u\n", - (unsigned long long)next_bytenr, - chunk_len); + next_bytenr, chunk_len); ret = btrfsic_map_block(state, next_bytenr, chunk_len, &next_block_ctx, mirror_num); @@ -1519,8 +1496,7 @@ static int btrfsic_handle_extent_data( printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu," " mirror=%d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); return -1; } @@ -1549,12 +1525,10 @@ static int btrfsic_handle_extent_data( " found in hash table, D," " bytenr mismatch" " (!= stored %llu).\n", - (unsigned long long)next_bytenr, + next_bytenr, next_block_ctx.dev->name, - (unsigned long long) next_block_ctx.dev_bytenr, mirror_num, - (unsigned long long) next_block->logical_bytenr); } next_block->logical_bytenr = next_bytenr; @@ -1681,7 +1655,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: read_block() with unaligned bytenr %llu\n", - (unsigned long long)block_ctx->dev_bytenr); + block_ctx->dev_bytenr); return -1; } @@ -1778,10 +1752,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state) printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num); + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num); list_for_each(elem_ref_to, &b_all->ref_to_list) { const struct btrfsic_block_link *const l = @@ -1793,16 +1765,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state) " refers %u* to" " %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num, + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -1816,16 +1785,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state) " is ref %u* from" " %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num, + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long) l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long) l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); } @@ -1942,12 +1907,9 @@ again: " found in hash table, %c," " bytenr mismatch" " (!= stored %llu).\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block), - (unsigned long long) block->logical_bytenr); block->logical_bytenr = bytenr; } else if (state->print_mask & @@ -1955,9 +1917,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } else { @@ -1973,9 +1933,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } @@ -1992,21 +1950,14 @@ again: " new(gen=%llu)," " which is referenced by most recent superblock" " (superblockgen=%llu)!\n", - btrfsic_get_block_type(state, block), - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - block->mirror_num, - (unsigned long long)block->generation, - (unsigned long long) + btrfsic_get_block_type(state, block), bytenr, + dev_state->name, dev_bytenr, block->mirror_num, + block->generation, btrfs_disk_key_objectid(&block->disk_key), block->disk_key.type, - (unsigned long long) btrfs_disk_key_offset(&block->disk_key), - (unsigned long long) btrfs_stack_header_generation( (struct btrfs_header *) mapped_datav[0]), - (unsigned long long) state->max_superblock_generation); btrfsic_dump_tree(state); } @@ -2015,13 +1966,9 @@ again: printk(KERN_INFO "btrfs: attempt to overwrite %c-block" " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," " which is not yet iodone!\n", - btrfsic_get_block_type(state, block), - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - block->mirror_num, - (unsigned long long)block->generation, - (unsigned long long) + btrfsic_get_block_type(state, block), bytenr, + dev_state->name, dev_bytenr, block->mirror_num, + block->generation, btrfs_stack_header_generation( (struct btrfs_header *) mapped_datav[0])); @@ -2064,7 +2011,7 @@ again: if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" - " failed!\n", (unsigned long long)bytenr); + " failed!\n", bytenr); goto continue_loop; } block_ctx.datav = mapped_datav; @@ -2148,7 +2095,7 @@ again: printk(KERN_INFO "btrfsic: btrfsic_process_metablock" "(root @%llu) failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); } else { block->is_metadata = 0; block->mirror_num = 0; /* unknown */ @@ -2176,8 +2123,7 @@ again: if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block (%s/%llu/?)" " !found in hash table, D.\n", - dev_state->name, - (unsigned long long)dev_bytenr); + dev_state->name, dev_bytenr); if (!state->include_extent_data) { /* ignore that written D block */ goto continue_loop; @@ -2201,9 +2147,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/?)" " !found in hash table, M.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr); + bytenr, dev_state->name, dev_bytenr); ret = btrfsic_map_block(state, bytenr, processed_len, &block_ctx, 0); @@ -2211,7 +2155,7 @@ again: printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" " failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); goto continue_loop; } } @@ -2276,10 +2220,8 @@ again: printk(KERN_INFO "New written %c-block @%llu (%s/%llu/%d)\n", is_metadata ? 'M' : 'D', - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); @@ -2290,7 +2232,7 @@ again: printk(KERN_INFO "btrfsic: process_metablock(root @%llu)" " failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); } btrfsic_release_block_ctx(&block_ctx); } @@ -2328,10 +2270,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", bio_error_status, btrfsic_get_block_type(dev_state->state, block), - (unsigned long long)block->logical_bytenr, - dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, dev_state->name, + block->dev_bytenr, block->mirror_num); next_block = block->next_in_same_bio; block->iodone_w_error = iodone_w_error; if (block->submit_bio_bh_rw & REQ_FLUSH) { @@ -2341,7 +2281,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) printk(KERN_INFO "bio_end_io() new %s flush_gen=%llu\n", dev_state->name, - (unsigned long long) dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) @@ -2367,10 +2306,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", iodone_w_error, btrfsic_get_block_type(dev_state->state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); block->iodone_w_error = iodone_w_error; if (block->submit_bio_bh_rw & REQ_FLUSH) { @@ -2379,8 +2316,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) printk(KERN_INFO "bh_end_io() new %s flush_gen=%llu\n", - dev_state->name, - (unsigned long long)dev_state->last_flush_gen); + dev_state->name, dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) block->flush_gen = 0; /* FUA completed means block is on disk */ @@ -2405,26 +2341,20 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic: superblock @%llu (%s/%llu/%d)" " with old gen %llu <= %llu\n", - (unsigned long long)superblock->logical_bytenr, + superblock->logical_bytenr, superblock->dev_state->name, - (unsigned long long)superblock->dev_bytenr, - superblock->mirror_num, - (unsigned long long) + superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), - (unsigned long long) state->max_superblock_generation); } else { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) printk(KERN_INFO "btrfsic: got new superblock @%llu (%s/%llu/%d)" " with new gen %llu > %llu\n", - (unsigned long long)superblock->logical_bytenr, + superblock->logical_bytenr, superblock->dev_state->name, - (unsigned long long)superblock->dev_bytenr, - superblock->mirror_num, - (unsigned long long) + superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), - (unsigned long long) state->max_superblock_generation); state->max_superblock_generation = @@ -2455,8 +2385,7 @@ static int btrfsic_process_written_superblock( next_bytenr = btrfs_super_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "root@%llu\n", next_bytenr); break; case 1: btrfs_set_disk_key_objectid(&tmp_disk_key, @@ -2465,8 +2394,7 @@ static int btrfsic_process_written_superblock( next_bytenr = btrfs_super_chunk_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "chunk@%llu\n", next_bytenr); break; case 2: btrfs_set_disk_key_objectid(&tmp_disk_key, @@ -2477,8 +2405,7 @@ static int btrfsic_process_written_superblock( continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "log@%llu\n", next_bytenr); break; } @@ -2487,7 +2414,7 @@ static int btrfsic_process_written_superblock( next_bytenr, BTRFS_SUPER_INFO_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { int was_created; @@ -2503,8 +2430,7 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu," " mirror=%d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); return -1; } @@ -2589,26 +2515,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " %u* refers to %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num, + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); if (l->block_ref_to->never_written) { printk(KERN_INFO "btrfs: attempt to write superblock" " which references block %c @%llu (%s/%llu/%d)" " which is never written!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (!l->block_ref_to->is_iodone) { @@ -2616,10 +2538,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " which references block %c @%llu (%s/%llu/%d)" " which is not yet iodone!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->block_ref_to->iodone_w_error) { @@ -2627,10 +2548,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " which references block %c @%llu (%s/%llu/%d)" " which has write error!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->parent_generation != @@ -2644,13 +2564,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " with generation %llu !=" " parent generation %llu!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num, - (unsigned long long)l->block_ref_to->generation, - (unsigned long long)l->parent_generation); + l->block_ref_to->generation, + l->parent_generation); ret = -1; } else if (l->block_ref_to->flush_gen > l->block_ref_to->dev_state->last_flush_gen) { @@ -2660,13 +2579,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " (block flush_gen=%llu," " dev->flush_gen=%llu)!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, - l->block_ref_to->mirror_num, - (unsigned long long)block->flush_gen, - (unsigned long long) + l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num, block->flush_gen, l->block_ref_to->dev_state->last_flush_gen); ret = -1; } else if (-1 == btrfsic_check_all_ref_blocks(state, @@ -2711,16 +2627,12 @@ static int btrfsic_is_block_ref_by_superblock( " is ref %u* from %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num, + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long) l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long) l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); if (l->block_ref_from->is_superblock && @@ -2747,14 +2659,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state, " to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long)l->block_ref_from->dev_bytenr, - l->block_ref_from->mirror_num, + l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long)l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2766,14 +2676,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state, " to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long)l->block_ref_from->dev_bytenr, - l->block_ref_from->mirror_num, + l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long)l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2817,10 +2725,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, */ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { printk("[...]\n"); return; @@ -2953,10 +2859,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( "New %s%c-block @%llu (%s/%llu/%d)\n", additional_string, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - dev_state->name, - (unsigned long long)block->dev_bytenr, - mirror_num); + block->logical_bytenr, dev_state->name, + block->dev_bytenr, mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); if (NULL != was_created) @@ -2990,7 +2894,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "btrfsic:" " btrfsic_map_block(logical @%llu," " mirror %d) failed!\n", - (unsigned long long)bytenr, mirror_num); + bytenr, mirror_num); continue; } @@ -3007,8 +2911,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," " buffer->log_bytenr=%llu, submit_bio(bdev=%s," " phys_bytenr=%llu)!\n", - (unsigned long long)bytenr, dev_state->name, - (unsigned long long)dev_bytenr); + bytenr, dev_state->name, dev_bytenr); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { ret = btrfsic_map_block(state, bytenr, state->metablock_size, @@ -3018,10 +2921,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "Read logical bytenr @%llu maps to" " (%s/%llu/%d)\n", - (unsigned long long)bytenr, - block_ctx.dev->name, - (unsigned long long)block_ctx.dev_bytenr, - mirror_num); + bytenr, block_ctx.dev->name, + block_ctx.dev_bytenr, mirror_num); } WARN_ON(1); } @@ -3060,8 +2961,7 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) printk(KERN_INFO "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," " size=%lu, data=%p, bdev=%p)\n", - rw, (unsigned long)bh->b_blocknr, - (unsigned long long)dev_bytenr, + rw, (unsigned long)bh->b_blocknr, dev_bytenr, (unsigned long)bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, @@ -3130,8 +3030,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) "submit_bio(rw=0x%x, bi_vcnt=%u," " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, - (unsigned long long)dev_bytenr, - bio->bi_bdev); + dev_bytenr, bio->bi_bdev); mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, GFP_NOFS); @@ -3379,10 +3278,8 @@ void btrfsic_unmount(struct btrfs_root *root, " @%llu (%s/%llu/%d) on umount which is" " not yet iodone!\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num); + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num); } mutex_unlock(&btrfsic_mutex); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 37dff5b2d56d..6aad98cb343f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -132,9 +132,8 @@ static int check_compressed_csum(struct inode *inode, printk(KERN_INFO "btrfs csum failed ino %llu " "extent %llu csum %u " "wanted %u mirror %d\n", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)disk_start, - csum, *cb_sum, cb->mirror_num); + btrfs_ino(inode), disk_start, csum, *cb_sum, + cb->mirror_num); ret = -EIO; goto fail; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 09b3870c2729..1d94242ec002 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1383,14 +1383,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, if (trans->transaction != root->fs_info->running_transaction) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long) + trans->transid, root->fs_info->running_transaction->transid); if (trans->transid != root->fs_info->generation) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long)root->fs_info->generation); + trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 685957ac208e..33dd4a5349fb 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1474,10 +1474,8 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, printk(KERN_ERR "err add delayed dir index item(name: %s) into " "the insertion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - name, - (unsigned long long)delayed_node->root->objectid, - (unsigned long long)delayed_node->inode_id, - ret); + name, delayed_node->root->objectid, + delayed_node->inode_id, ret); BUG(); } mutex_unlock(&delayed_node->mutex); @@ -1548,9 +1546,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, printk(KERN_ERR "err add delayed dir index item(index: %llu) " "into the deletion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - (unsigned long long)index, - (unsigned long long)node->root->objectid, - (unsigned long long)node->inode_id, + index, node->root->objectid, node->inode_id, ret); BUG(); } diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 4253ad580e39..e34e72f3e1aa 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -148,7 +148,7 @@ no_valid_dev_replace_entry_found: !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", - (unsigned long long)src_devid); + src_devid); } if (!dev_replace->tgtdev && !btrfs_test_opt(dev_root, DEGRADED)) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c60372de6647..5003466b00d2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -303,9 +303,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " "failed on %llu wanted %X found %X " "level %d\n", - root->fs_info->sb->s_id, - (unsigned long long)buf->start, val, found, - btrfs_header_level(buf)); + root->fs_info->sb->s_id, buf->start, + val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); return 1; @@ -346,9 +345,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, } printk_ratelimited("parent transid verify failed on %llu wanted %llu " "found %llu\n", - (unsigned long long)eb->start, - (unsigned long long)parent_transid, - (unsigned long long)btrfs_header_generation(eb)); + eb->start, parent_transid, btrfs_header_generation(eb)); ret = 1; clear_extent_buffer_uptodate(eb); out: @@ -513,8 +510,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, #define CORRUPT(reason, eb, root, slot) \ printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ "root=%llu, slot=%d\n", reason, \ - (unsigned long long)btrfs_header_bytenr(eb), \ - (unsigned long long)root->objectid, slot) + btrfs_header_bytenr(eb), root->objectid, slot) static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) @@ -614,14 +610,13 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, if (found_start != eb->start) { printk_ratelimited(KERN_INFO "btrfs bad tree block start " "%llu %llu\n", - (unsigned long long)found_start, - (unsigned long long)eb->start); + found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", - (unsigned long long)eb->start); + eb->start); ret = -EIO; goto err; } @@ -2405,7 +2400,7 @@ int open_ctree(struct super_block *sb, if (features) { printk(KERN_ERR "BTRFS: couldn't mount because of " "unsupported optional features (%Lx).\n", - (unsigned long long)features); + features); err = -EINVAL; goto fail_alloc; } @@ -2475,7 +2470,7 @@ int open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY) && features) { printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " "unsupported option features (%Lx).\n", - (unsigned long long)features); + features); err = -EINVAL; goto fail_alloc; } @@ -3681,9 +3676,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " "found %llu running %llu\n", - (unsigned long long)buf->start, - (unsigned long long)transid, - (unsigned long long)root->fs_info->generation); + buf->start, transid, root->fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); if (!was_dirty) __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8cb1d41ba501..69083330ee16 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5718,7 +5718,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); if (ret > 0) btrfs_print_leaf(extent_root, path->nodes[0]); @@ -5734,11 +5734,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root_objectid, - (unsigned long long)owner_objectid, - (unsigned long long)owner_offset); + bytenr, parent, root_objectid, owner_objectid, + owner_offset); } else { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5767,7 +5764,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, -1, 1); if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { @@ -6529,19 +6526,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, spin_lock(&info->lock); printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", - (unsigned long long)info->flags, - (unsigned long long)(info->total_bytes - info->bytes_used - - info->bytes_pinned - info->bytes_reserved - - info->bytes_readonly), + info->flags, + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly, (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " "reserved=%llu, may_use=%llu, readonly=%llu\n", - (unsigned long long)info->total_bytes, - (unsigned long long)info->bytes_used, - (unsigned long long)info->bytes_pinned, - (unsigned long long)info->bytes_reserved, - (unsigned long long)info->bytes_may_use, - (unsigned long long)info->bytes_readonly); + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); spin_unlock(&info->lock); if (!dump_block_groups) @@ -6552,12 +6545,9 @@ again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", - (unsigned long long)cache->key.objectid, - (unsigned long long)cache->key.offset, - (unsigned long long)btrfs_block_group_used(&cache->item), - (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved, - cache->ro ? "[readonly]" : ""); + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -6594,8 +6584,7 @@ again: sinfo = __find_space_info(root->fs_info, flags); btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", - (unsigned long long)flags, - (unsigned long long)num_bytes); + flags, num_bytes); if (sinfo) dump_space_info(sinfo, num_bytes, 1); } @@ -6615,7 +6604,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { btrfs_err(root->fs_info, "Unable to find block group for %llu", - (unsigned long long)start); + start); return -ENOSPC; } @@ -6711,8 +6700,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; @@ -6784,8 +6772,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e7d604004f9..5172e8253c4a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void) state = list_entry(states.next, struct extent_state, leak_list); printk(KERN_ERR "btrfs state leak: start %llu end %llu " "state %lu in tree %p refs %d\n", - (unsigned long long)state->start, - (unsigned long long)state->end, - state->state, state->tree, atomic_read(&state->refs)); + state->start, state->end, state->state, state->tree, + atomic_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); } @@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); printk(KERN_ERR "btrfs buffer leak start %llu len %lu " - "refs %d\n", (unsigned long long)eb->start, - eb->len, atomic_read(&eb->refs)); + "refs %d\n", + eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { printk_ratelimited(KERN_DEBUG "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", - caller, - (unsigned long long)btrfs_ino(inode), - (unsigned long long)isize, - (unsigned long long)start, - (unsigned long long)end); + caller, btrfs_ino(inode), isize, start, end); } } #else @@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree, if (end < start) WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", - (unsigned long long)end, - (unsigned long long)start); + end, start); state->start = start; state->end = end; @@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree, struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); printk(KERN_ERR "btrfs found node %llu %llu on insert of " - "%llu %llu\n", (unsigned long long)found->start, - (unsigned long long)found->end, - (unsigned long long)start, (unsigned long long)end); + "%llu %llu\n", + found->start, found->end, start, end); return -EEXIST; } state->tree = tree; @@ -3298,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (!PageWriteback(page)) { printk(KERN_ERR "btrfs warning page %lu not " "writeback, cur %llu end %llu\n", - page->index, (unsigned long long)cur, - (unsigned long long)end); + page->index, cur, end); } ret = submit_extent_page(write_flags, tree, page, @@ -4923,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, if (start + min_len > eb->len) { WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", (unsigned long long)eb->start, - eb->len, start, min_len); + "wanted %lu %lu\n", + eb->start, eb->len, start, min_len); return -EINVAL; } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f87d09a0dfaa..4f53159bdb9d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -248,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, } else { printk(KERN_INFO "btrfs no csum found " "for inode %llu start %llu\n", - (unsigned long long) - btrfs_ino(inode), - (unsigned long long)offset); + btrfs_ino(inode), offset); } item = NULL; btrfs_release_path(path); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 34ecc651c9ab..23182648976b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -673,8 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, btrfs_err(root->fs_info, "free space inode generation (%llu) " "did not match free space cache generation (%llu)", - (unsigned long long)BTRFS_I(inode)->generation, - (unsigned long long)generation); + BTRFS_I(inode)->generation, generation); return 0; } @@ -1991,8 +1990,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, if (info->bytes >= bytes && !block_group->ro) count++; printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", - (unsigned long long)info->offset, - (unsigned long long)info->bytes, + info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } printk(KERN_INFO "block group has cluster?: %s\n", diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 264feefa0290..21d8674f7837 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2786,8 +2786,7 @@ good: zeroit: if (__ratelimit(&_rs)) btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", - (unsigned long long)btrfs_ino(page->mapping->host), - (unsigned long long)start, csum, csum_expected); + btrfs_ino(page->mapping->host), start, csum, csum_expected); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr); @@ -3572,8 +3571,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, if (ret) { btrfs_info(root->fs_info, "failed to delete reference to %.*s, inode %llu parent %llu", - name_len, name, - (unsigned long long)ino, (unsigned long long)dir_ino); + name_len, name, ino, dir_ino); btrfs_abort_transaction(trans, root, ret); goto err; } @@ -6081,10 +6079,7 @@ insert: btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", - (unsigned long long)em->start, - (unsigned long long)em->len, - (unsigned long long)start, - (unsigned long long)len); + em->start, em->len, start, len); err = -EIO; goto out; } @@ -6753,9 +6748,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) flush_dcache_page(bvec->bv_page); if (csum != csums[index]) { btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)start, - csum, csums[index]); + btrfs_ino(inode), start, csum, + csums[index]); err = -EIO; } } @@ -6843,7 +6837,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) if (err) { printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " "sector %#Lx len %u err no %d\n", - (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, + btrfs_ino(dip->inode), bio->bi_rw, (unsigned long long)bio->bi_sector, bio->bi_size, err); dip->errors = 1; @@ -7775,7 +7769,7 @@ void btrfs_destroy_inode(struct inode *inode) if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)) { btrfs_info(root->fs_info, "inode %llu still on the orphan list", - (unsigned long long)btrfs_ino(inode)); + btrfs_ino(inode)); atomic_dec(&root->orphan_inodes); } @@ -7785,8 +7779,7 @@ void btrfs_destroy_inode(struct inode *inode) break; else { btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", - (unsigned long long)ordered->file_offset, - (unsigned long long)ordered->len); + ordered->file_offset, ordered->len); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 559cb161a60a..996951a8d28d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1421,14 +1421,13 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - printk(KERN_INFO "btrfs: resizing devid %llu\n", - (unsigned long long)devid); + printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); } device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", - (unsigned long long)devid); + devid); ret = -ENODEV; goto out_free; } @@ -1436,7 +1435,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (!device->writeable) { printk(KERN_INFO "btrfs: resizer unable to apply on " "readonly device %llu\n", - (unsigned long long)devid); + devid); ret = -EPERM; goto out_free; } @@ -1488,8 +1487,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, new_size *= root->sectorsize; printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", - rcu_str_deref(device->name), - (unsigned long long)new_size); + rcu_str_deref(device->name), new_size); if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b52b2c4010c2..f2023ccb7cf6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " - "%llu\n", (unsigned long long)offset); + "%llu\n", offset); } /* @@ -336,14 +336,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, *file_offset = dec_end; if (dec_start > dec_end) { printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", - (unsigned long long)dec_start, - (unsigned long long)dec_end); + dec_start, dec_end); } to_dec = dec_end - dec_start; if (to_dec > entry->bytes_left) { printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", - (unsigned long long)entry->bytes_left, - (unsigned long long)to_dec); + entry->bytes_left, to_dec); } entry->bytes_left -= to_dec; if (!uptodate) @@ -403,8 +401,7 @@ have_entry: if (io_size > entry->bytes_left) { printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", - (unsigned long long)entry->bytes_left, - (unsigned long long)io_size); + entry->bytes_left, io_size); } entry->bytes_left -= io_size; if (!uptodate) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32022f3f169b..0088bedc8631 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) int i; printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " "num_stripes %d\n", - (unsigned long long)btrfs_chunk_length(eb, chunk), - (unsigned long long)btrfs_chunk_owner(eb, chunk), - (unsigned long long)btrfs_chunk_type(eb, chunk), - num_stripes); + btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk), + btrfs_chunk_type(eb, chunk), num_stripes); for (i = 0 ; i < num_stripes ; i++) { printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, - (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), - (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); + btrfs_stripe_devid_nr(eb, chunk, i), + btrfs_stripe_offset_nr(eb, chunk, i)); } } static void print_dev_item(struct extent_buffer *eb, @@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb, { printk(KERN_INFO "\t\tdev item devid %llu " "total_bytes %llu bytes used %llu\n", - (unsigned long long)btrfs_device_id(eb, dev_item), - (unsigned long long)btrfs_device_total_bytes(eb, dev_item), - (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); + btrfs_device_id(eb, dev_item), + btrfs_device_total_bytes(eb, dev_item), + btrfs_device_bytes_used(eb, dev_item)); } static void print_extent_data_ref(struct extent_buffer *eb, struct btrfs_extent_data_ref *ref) { printk(KERN_INFO "\t\textent data backref root %llu " "objectid %llu offset %llu count %u\n", - (unsigned long long)btrfs_extent_data_ref_root(eb, ref), - (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref), - (unsigned long long)btrfs_extent_data_ref_offset(eb, ref), + btrfs_extent_data_ref_root(eb, ref), + btrfs_extent_data_ref_objectid(eb, ref), + btrfs_extent_data_ref_offset(eb, ref), btrfs_extent_data_ref_count(eb, ref)); } @@ -87,9 +85,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot) flags = btrfs_extent_flags(eb, ei); printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", - (unsigned long long)btrfs_extent_refs(eb, ei), - (unsigned long long)btrfs_extent_generation(eb, ei), - (unsigned long long)flags); + btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), + flags); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct btrfs_tree_block_info *info; @@ -97,9 +94,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot) btrfs_tree_block_key(eb, info, &key); printk(KERN_INFO "\t\ttree block key (%llu %u %llu) " "level %d\n", - (unsigned long long)btrfs_disk_key_objectid(&key), - key.type, - (unsigned long long)btrfs_disk_key_offset(&key), + btrfs_disk_key_objectid(&key), key.type, + btrfs_disk_key_offset(&key), btrfs_tree_block_level(eb, info)); iref = (struct btrfs_extent_inline_ref *)(info + 1); } else { @@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot) switch (type) { case BTRFS_TREE_BLOCK_REF_KEY: printk(KERN_INFO "\t\ttree block backref " - "root %llu\n", (unsigned long long)offset); + "root %llu\n", offset); break; case BTRFS_SHARED_BLOCK_REF_KEY: printk(KERN_INFO "\t\tshared block backref " - "parent %llu\n", (unsigned long long)offset); + "parent %llu\n", offset); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot) sref = (struct btrfs_shared_data_ref *)(iref + 1); printk(KERN_INFO "\t\tshared data backref " "parent %llu count %u\n", - (unsigned long long)offset, - btrfs_shared_data_ref_count(eb, sref)); + offset, btrfs_shared_data_ref_count(eb, sref)); break; default: BUG(); @@ -148,9 +143,9 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); printk("\t\textent back ref root %llu gen %llu " "owner %llu num_refs %lu\n", - (unsigned long long)btrfs_ref_root_v0(eb, ref0), - (unsigned long long)btrfs_ref_generation_v0(eb, ref0), - (unsigned long long)btrfs_ref_objectid_v0(eb, ref0), + btrfs_ref_root_v0(eb, ref0), + btrfs_ref_generation_v0(eb, ref0), + btrfs_ref_objectid_v0(eb, ref0), (unsigned long)btrfs_ref_count_v0(eb, ref0)); } #endif @@ -196,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) nr = btrfs_header_nritems(l); btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", - (unsigned long long)btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(root, l)); + btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " "itemsize %d\n", - i, - (unsigned long long)key.objectid, type, - (unsigned long long)key.offset, + i, key.objectid, type, key.offset, btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk(KERN_INFO "\t\tinode generation %llu size %llu " "mode %o\n", - (unsigned long long) btrfs_inode_generation(l, ii), - (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_size(l, ii), btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); printk(KERN_INFO "\t\tdir oid %llu type %u\n", - (unsigned long long)found_key.objectid, + found_key.objectid, btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", - (unsigned long long) btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; @@ -264,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) } printk(KERN_INFO "\t\textent data disk bytenr %llu " "nr %llu\n", - (unsigned long long) btrfs_file_extent_disk_bytenr(l, fi), - (unsigned long long) btrfs_file_extent_disk_num_bytes(l, fi)); printk(KERN_INFO "\t\textent data offset %llu " "nr %llu ram %llu\n", - (unsigned long long) btrfs_file_extent_offset(l, fi), - (unsigned long long) btrfs_file_extent_num_bytes(l, fi), - (unsigned long long) btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_EXTENT_REF_V0_KEY: @@ -288,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk(KERN_INFO "\t\tblock group used %llu\n", - (unsigned long long) btrfs_disk_block_group_used(l, bi)); break; case BTRFS_CHUNK_ITEM_KEY: @@ -305,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" "\t\tchunk objectid %llu chunk offset %llu " "length %llu\n", - (unsigned long long) btrfs_dev_extent_chunk_tree(l, dev_extent), - (unsigned long long) btrfs_dev_extent_chunk_objectid(l, dev_extent), - (unsigned long long) btrfs_dev_extent_chunk_offset(l, dev_extent), - (unsigned long long) btrfs_dev_extent_length(l, dev_extent)); break; case BTRFS_DEV_STATS_KEY: @@ -344,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) return; } btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u", - (unsigned long long)btrfs_header_bytenr(c), - level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); + btrfs_header_bytenr(c), level, nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", - i, - (unsigned long long)key.objectid, - key.type, - (unsigned long long)key.offset, - (unsigned long long)btrfs_node_blockptr(c, i)); + i, key.objectid, key.type, key.offset, + btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 66c58e120b2c..4e6ef490619e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -398,8 +398,7 @@ next1: if (ret == -ENOENT) { printk(KERN_WARNING "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", - (unsigned long long)found_key.objectid, - (unsigned long long)found_key.offset); + found_key.objectid, found_key.offset); ret = 0; /* ignore the error */ } if (ret) @@ -1163,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; printk(KERN_INFO "unable to update quota limit for %llu\n", - (unsigned long long)qgroupid); + qgroupid); } spin_lock(&fs_info->qgroup_lock); @@ -1873,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, path, 1, 0); pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", - (unsigned long long)fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.objectid, fs_info->qgroup_rescan_progress.type, - (unsigned long long)fs_info->qgroup_rescan_progress.offset, - ret); + fs_info->qgroup_rescan_progress.offset, ret); if (ret) { /* diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2ec59c4c51b7..cf5e30ff7093 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) if (bnode->root) fs_info = bnode->root->fs_info; btrfs_panic(fs_info, errno, "Inconsistency in backref cache " - "found at offset %llu\n", (unsigned long long)bytenr); + "found at offset %llu\n", bytenr); } /* @@ -4234,8 +4234,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) } printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", - (unsigned long long)rc->block_group->key.objectid, - (unsigned long long)rc->block_group->flags); + rc->block_group->key.objectid, rc->block_group->flags); ret = btrfs_start_all_delalloc_inodes(fs_info, 0); if (ret < 0) { @@ -4257,7 +4256,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) break; printk(KERN_INFO "btrfs: found %llu extents\n", - (unsigned long long)rc->extents_found); + rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index c67b432d867d..0b1f4ef8db98 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); printk(KERN_CRIT "unable to update root key %llu %u %llu\n", - (unsigned long long)key->objectid, key->type, - (unsigned long long)key->offset); + key->objectid, key->type, key->offset); BUG_ON(1); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 266f24febce3..fcdf67ffacb0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -754,8 +754,7 @@ out: num_uncorrectable_read_errors); printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", - (unsigned long long)fixup->logical, - rcu_str_deref(fixup->dev->name)); + fixup->logical, rcu_str_deref(fixup->dev->name)); } btrfs_free_path(path); @@ -1154,8 +1153,7 @@ corrected_error: spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "btrfs: fixed up error at logical %llu on dev %s\n", - (unsigned long long)logical, - rcu_str_deref(dev->name)); + logical, rcu_str_deref(dev->name)); } } else { did_not_correct_error: @@ -1164,8 +1162,7 @@ did_not_correct_error: spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", - (unsigned long long)logical, - rcu_str_deref(dev->name)); + logical, rcu_str_deref(dev->name)); } out: @@ -2455,8 +2452,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, printk(KERN_ERR "btrfs scrub: tree block %llu spanning " "stripes, ignored. logical=%llu\n", - (unsigned long long)key.objectid, - (unsigned long long)logical); + key.objectid, logical); goto next; } @@ -3176,9 +3172,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work) nocow_ctx); if (ret != 0 && ret != -ENOENT) { pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", - (unsigned long long)logical, - (unsigned long long)physical_for_dev_replace, - (unsigned long long)len, + logical, physical_for_dev_replace, len, (unsigned long long)mirror_num, ret); not_written = 1; goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 57090b4284fe..4d5d0f3eb0d8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -522,7 +522,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) root->sectorsize); } printk(KERN_INFO "btrfs: max_inline at %llu\n", - (unsigned long long)info->max_inline); + info->max_inline); } else { ret = -ENOMEM; goto out; @@ -537,7 +537,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) kfree(num); printk(KERN_INFO "btrfs: allocations start at %llu\n", - (unsigned long long)info->alloc_start); + info->alloc_start); } else { ret = -ENOMEM; goto out; @@ -947,11 +947,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_inline != 8192 * 1024) - seq_printf(seq, ",max_inline=%llu", - (unsigned long long)info->max_inline); + seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->alloc_start != 0) - seq_printf(seq, ",alloc_start=%llu", - (unsigned long long)info->alloc_start); + seq_printf(seq, ",alloc_start=%llu", info->alloc_start); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e0336b9a4385..cac4a3f76323 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1948,8 +1948,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); - pr_debug("btrfs: cleaner removing %llu\n", - (unsigned long long)root->objectid); + pr_debug("btrfs: cleaner removing %llu\n", root->objectid); btrfs_kill_all_delayed_nodes(root); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ed685991b2c1..1534a134750f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -914,8 +914,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk(KERN_INFO "device fsid %pU ", disk_super->fsid); } - printk(KERN_CONT "devid %llu transid %llu %s\n", - (unsigned long long)devid, (unsigned long long)transid, path); + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (!ret && fs_devices_ret) @@ -3159,7 +3158,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->data.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "data profile %llu\n", - (unsigned long long)bctl->data.target); + bctl->data.target); ret = -EINVAL; goto out; } @@ -3168,7 +3167,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->meta.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "metadata profile %llu\n", - (unsigned long long)bctl->meta.target); + bctl->meta.target); ret = -EINVAL; goto out; } @@ -3177,7 +3176,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->sys.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "system profile %llu\n", - (unsigned long long)bctl->sys.target); + bctl->sys.target); ret = -EINVAL; goto out; } @@ -4652,8 +4651,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (!em) { btrfs_crit(fs_info, "unable to find logical %llu len %llu", - (unsigned long long)logical, - (unsigned long long)*length); + logical, *length); return -EINVAL; } @@ -5524,9 +5522,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (map_length < length) { btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", - (unsigned long long)logical, - (unsigned long long)length, - (unsigned long long)map_length); + logical, length, map_length); BUG(); } @@ -5846,8 +5842,7 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - btrfs_warn(root->fs_info, "devid %llu missing", - (unsigned long long)devid); + btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; -- cgit v1.2.3 From 6e71c47afee8178c0620323009b23813bfde7eac Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:08 +0200 Subject: Btrfs: Make BTRFS_DEV_REPLACE_DEVID an unsigned long long constant The internal btrfs device id is a u64, hence make the constant BTRFS_DEV_REPLACE_DEVID "unsigned long long" as well, so we no longer need a cast to print it. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/dev-replace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0ef9d866952f..7add85a645a3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -146,7 +146,7 @@ struct btrfs_ordered_sum; #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 -#define BTRFS_DEV_REPLACE_DEVID 0 +#define BTRFS_DEV_REPLACE_DEVID 0ULL /* * the max metadata block size. This limit is somewhat artificial, diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index e34e72f3e1aa..af800ef677a0 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -154,7 +154,7 @@ no_valid_dev_replace_entry_found: !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", - (unsigned long long)BTRFS_DEV_REPLACE_DEVID); + BTRFS_DEV_REPLACE_DEVID); } if (dev_replace->tgtdev) { if (dev_replace->srcdev) { -- cgit v1.2.3 From 27f9f02357f2bff96fc5e8a000c78ec5f96d42af Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:09 +0200 Subject: Btrfs: Format PAGE_SIZE as unsigned long PAGE_SIZE is "unsigned long" everywhere, so there's no need to cast it to "unsigned long long" and format it as a 64-bit number. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/scrub.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fcdf67ffacb0..b2a5f12dc9e7 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2859,9 +2859,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { /* not supported for data w/o checksums */ printk(KERN_ERR - "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", - fs_info->chunk_root->sectorsize, - (unsigned long long)PAGE_SIZE); + "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n", + fs_info->chunk_root->sectorsize, PAGE_SIZE); return -EINVAL; } -- cgit v1.2.3 From 118a0a251425b5ed5d5951097f9ef95f30611b03 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:10 +0200 Subject: Btrfs: Format mirror_num as int mirror_num is always "int", hence don't cast it to "unsigned long long" and format it as a 64-bit number. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/scrub.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b2a5f12dc9e7..ec6a33a20200 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3170,9 +3170,9 @@ static void copy_nocow_pages_worker(struct btrfs_work *work) copy_nocow_pages_for_inode, nocow_ctx); if (ret != 0 && ret != -ENOENT) { - pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", - logical, physical_for_dev_replace, len, - (unsigned long long)mirror_num, ret); + pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", + logical, physical_for_dev_replace, len, mirror_num, + ret); not_written = 1; goto out; } -- cgit v1.2.3 From 410ba3a291081d98f0e58a868e1305110d986903 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:11 +0200 Subject: Btrfs: Make btrfs_device_uuid() return unsigned long All callers of btrfs_device_uuid() cast its return type to unsigned long. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/volumes.c | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7add85a645a3..b4e70537cada 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2164,9 +2164,9 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, generation, 64); -static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) { - return (char *)d + offsetof(struct btrfs_dev_item, uuid); + return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); } static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1534a134750f..9ea500465256 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1424,7 +1424,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_bandwidth(leaf, dev_item, 0); btrfs_set_device_start_offset(leaf, dev_item, 0); - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); ptr = (unsigned long)btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); @@ -1924,8 +1924,7 @@ next_slot: dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), @@ -5760,7 +5759,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); device->is_tgtdev_for_dev_replace = 0; - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); } @@ -5823,8 +5822,7 @@ static int read_one_dev(struct btrfs_root *root, u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), -- cgit v1.2.3 From 1473b24ee0cb43d4b48aa0c5f1b8417928910a4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:12 +0200 Subject: Btrfs: Make btrfs_device_fsid() return unsigned long All callers of btrfs_device_fsid() cast its return type to unsigned long. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/volumes.c | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b4e70537cada..b71bc3a99342 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2169,9 +2169,9 @@ static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); } -static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) +static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d) { - return (char *)d + offsetof(struct btrfs_dev_item, fsid); + return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid); } BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9ea500465256..8b8c4397165c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1426,7 +1426,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); - ptr = (unsigned long)btrfs_device_fsid(dev_item); + ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1926,8 +1926,7 @@ next_slot: devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); @@ -5824,8 +5823,7 @@ static int read_one_dev(struct btrfs_root *root, devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { -- cgit v1.2.3 From 231e88f41027d90e9516d257d8085069b65686dd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:13 +0200 Subject: Btrfs: Make btrfs_dev_extent_chunk_tree_uuid() return unsigned long Internally, btrfs_dev_extent_chunk_tree_uuid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/volumes.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b71bc3a99342..2202bd8c5e16 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2330,10 +2330,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); -static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) { unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); - return (u8 *)((unsigned long)dev + ptr); + return (unsigned long)dev + ptr; } BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8b8c4397165c..34068b887f14 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1311,8 +1311,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), - BTRFS_UUID_SIZE); + btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); -- cgit v1.2.3 From fba6aa75654394fccf2530041e9451414c28084f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:14 +0200 Subject: Btrfs: Make btrfs_header_fsid() return unsigned long Internally, btrfs_header_fsid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 15 +++++---------- fs/btrfs/ctree.h | 5 ++--- fs/btrfs/disk-io.c | 9 +++------ fs/btrfs/ioctl.c | 3 +-- 4 files changed, 11 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1d94242ec002..8e3efe3bad24 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -997,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -3109,8 +3107,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(c), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, @@ -3250,8 +3247,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(split), - BTRFS_FSID_SIZE); + btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); @@ -4006,8 +4002,7 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); + btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(right), diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2202bd8c5e16..1b8f5def29c7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2675,10 +2675,9 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, btrfs_set_header_flags(eb, flags); } -static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) { - unsigned long ptr = offsetof(struct btrfs_header, fsid); - return (u8 *)ptr; + return offsetof(struct btrfs_header, fsid); } static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5003466b00d2..d67a2ddb3ce6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -495,8 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; - read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), - BTRFS_FSID_SIZE); + read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; @@ -1292,8 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(leaf), @@ -1380,8 +1378,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->node = leaf; write_extent_buffer(root->node, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(root->node), - BTRFS_FSID_SIZE); + btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 996951a8d28d..41addd502bc9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -436,8 +436,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(leaf), -- cgit v1.2.3 From b308bc2f05a86e728bd035e21a4974acd05f4d1e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:15 +0200 Subject: Btrfs: Make btrfs_header_chunk_tree_uuid() return unsigned long Internally, btrfs_header_chunk_tree_uuid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 7 +++---- fs/btrfs/ctree.h | 5 ++--- fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/ioctl.c | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8e3efe3bad24..5fa521bec07b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3111,8 +3111,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); @@ -3249,7 +3248,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, write_extent_buffer(split, root->fs_info->fsid, btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(split), + btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); @@ -4005,7 +4004,7 @@ again: btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(right), + btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); if (split == 0) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1b8f5def29c7..c90be01cbe67 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2680,10 +2680,9 @@ static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) return offsetof(struct btrfs_header, fsid); } -static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) { - unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); - return (u8 *)ptr; + return offsetof(struct btrfs_header, chunk_tree_uuid); } static inline int btrfs_is_leaf(struct extent_buffer *eb) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d67a2ddb3ce6..940bc486a0f8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1294,7 +1294,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -2629,8 +2629,7 @@ int open_ctree(struct super_block *sb, chunk_root->commit_root = btrfs_root_node(chunk_root); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); ret = btrfs_read_chunk_tree(chunk_root); if (ret) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 41addd502bc9..0ce93ac17f69 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir, write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); -- cgit v1.2.3 From 778746b53be8a83a640b9b855b249c8020157eae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:16 +0200 Subject: Btrfs: PAGE_CACHE_SIZE is already unsigned long PAGE_CACHE_SIZE == PAGE_SIZE is "unsigned long" everywhere, so there's no need to cast it to "unsigned long". Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 9 ++++----- fs/btrfs/extent_io.c | 18 +++++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index fa68c8ab9a15..3c6379987a97 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1236,8 +1236,7 @@ static void btrfsic_read_from_block_data( unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; WARN_ON(offset + len > block_ctx->len); - offset_in_page = (start_offset + offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1); while (len > 0) { cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); @@ -3122,19 +3121,19 @@ int btrfsic_mount(struct btrfs_root *root, if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->nodesize, (unsigned long)PAGE_CACHE_SIZE); + root->nodesize, PAGE_CACHE_SIZE); return -1; } if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->leafsize, (unsigned long)PAGE_CACHE_SIZE); + root->leafsize, PAGE_CACHE_SIZE); return -1; } if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); + root->sectorsize, PAGE_CACHE_SIZE); return -1; } state = kzalloc(sizeof(*state), GFP_NOFS); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5172e8253c4a..09582b81640c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4873,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4943,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4977,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -5007,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -5038,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, WARN_ON(src->len != dst_len); offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(dst, i); @@ -5124,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; @@ -5177,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); -- cgit v1.2.3 From fce29364e58666ed05dab8a513d2366f31d132bf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:17 +0200 Subject: Btrfs: Do not truncate sector_t on 32-bit with CONFIG_LBDAF=y sector_t may be either "u64" (always 64 bit) or "unsigned long" (32 or 64 bit). Casting it to "unsigned long" will truncate it on 32-bit platforms where CONFIG_LBDAF=y. Cast to "unsigned long long" and format using "ll" instead. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 3c6379987a97..45bc95570c7f 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2958,9 +2958,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," + "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu)," " size=%lu, data=%p, bdev=%p)\n", - rw, (unsigned long)bh->b_blocknr, dev_bytenr, + rw, (unsigned long long)bh->b_blocknr, + dev_bytenr, (unsigned long)bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, @@ -3027,9 +3028,10 @@ void btrfsic_submit_bio(int rw, struct bio *bio) BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO "submit_bio(rw=0x%x, bi_vcnt=%u," - " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, - dev_bytenr, bio->bi_bdev); + " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", + rw, bio->bi_vcnt, + (unsigned long long)bio->bi_sector, dev_bytenr, + bio->bi_bdev); mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, GFP_NOFS); -- cgit v1.2.3 From 8d78eb16638b4e9e57225fa32b0f6595ecbf4424 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:18 +0200 Subject: Btrfs: Use %z to format size_t Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 45bc95570c7f..1c47be187240 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2959,11 +2959,9 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu)," - " size=%lu, data=%p, bdev=%p)\n", + " size=%zu, data=%p, bdev=%p)\n", rw, (unsigned long long)bh->b_blocknr, - dev_bytenr, - (unsigned long)bh->b_size, bh->b_data, - bh->b_bdev); + dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, &bh->b_data, 1, NULL, NULL, bh, rw); -- cgit v1.2.3 From bdab49d760c14dd7d5a8f64dbfedd7a3f1121e9b Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 20 Aug 2013 17:51:48 +0100 Subject: Btrfs: fix printing of non NULL terminated string The name buffer is not terminated by a '\0' character, therefore it needs to be printed with %.*s and use the length of the buffer. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/delayed-inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 33dd4a5349fb..cbd9523ad09c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1471,10 +1471,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); if (unlikely(ret)) { - printk(KERN_ERR "err add delayed dir index item(name: %s) into " - "the insertion tree of the delayed node" + printk(KERN_ERR "err add delayed dir index item(name: %.*s) " + "into the insertion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - name, delayed_node->root->objectid, + name_len, name, delayed_node->root->objectid, delayed_node->inode_id, ret); BUG(); } -- cgit v1.2.3 From 57cfd4627046efc43081d26b5db77dbfb7595caa Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 20 Aug 2013 15:55:39 -0400 Subject: Btrfs: fix send to deal with sparse files properly Send was just sending everything it found, even if the extent was a hole. This is unpleasant for users, so just skip holes when we are sending. This will also skip sending prealloc extents since the send spec doesn't have a prealloc command. Eventually we will add a prealloc command and rev the send version so we can send down the prealloc info. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index db7da682f0d2..cbe92da5f33b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3920,7 +3920,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || found_key.type != key.type) { - ret = 0; + /* If we're a hole then just pretend nothing changed */ + ret = (left_disknr) ? 0 : 1; goto out; } @@ -3946,7 +3947,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, * This may only happen on the first iteration. */ if (found_key.offset + right_len <= ekey->offset) { - ret = 0; + /* If we're a hole just pretend nothing changed */ + ret = (left_disknr) ? 0 : 1; goto out; } @@ -4011,8 +4013,8 @@ static int process_extent(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key) { - int ret = 0; struct clone_root *found_clone = NULL; + int ret = 0; if (S_ISLNK(sctx->cur_inode_mode)) return 0; @@ -4025,6 +4027,32 @@ static int process_extent(struct send_ctx *sctx, ret = 0; goto out; } + } else { + struct btrfs_file_extent_item *ei; + u8 type; + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], ei); + if (type == BTRFS_FILE_EXTENT_PREALLOC || + type == BTRFS_FILE_EXTENT_REG) { + /* + * The send spec does not have a prealloc command yet, + * so just leave a hole for prealloc'ed extents until + * we have enough commands queued up to justify rev'ing + * the send spec. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC) { + ret = 0; + goto out; + } + + /* Have a hole, just skip it. */ + if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { + ret = 0; + goto out; + } + } } ret = find_extent_clone(sctx, path, key->objectid, key->offset, -- cgit v1.2.3 From e8e7cff667e674a886f4fbf1773c217bb9a0f664 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 21 Aug 2013 15:54:00 -0400 Subject: Btrfs: do not clear our orphan item runtime flag on eexist We were unconditionally clearing our runtime flag on the inode on error when trying to insert an orphan item. This is wrong in the case of -EEXIST since we obviously have an orphan item. This was causing us to not do the correct cleanup of our orphan items which caused issues on cleanup. This happens because currently when truncate fails we just leave the orphan item on there so it can be cleaned up, so if we go to remove the file later we will hit this issue. What we do for truncate isn't right either, but we shouldn't screw this sort of thing up on error either, so fix this and then I'll fix truncate in a different patch. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 21d8674f7837..4d4e2de3e879 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2950,14 +2950,14 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret) { - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags); if (reserve) { clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &BTRFS_I(inode)->runtime_flags); btrfs_orphan_release_metadata(inode); } if (ret != -EEXIST) { + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); btrfs_abort_transaction(trans, root, ret); return ret; } -- cgit v1.2.3 From b8d0c69b9469ffd33df30fee3e990f2d4aa68a09 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 22 Aug 2013 17:03:29 -0400 Subject: Btrfs: remove ourselves from the cluster list under lock A user was reporting weird warnings from btrfs_put_delayed_ref() and I noticed that we were doing this list_del_init() on our head ref outside of delayed_refs->lock. This is a problem if we have people still on the list, we could end up modifying old pointers and such. Fix this by removing us from the list before we do our run_delayed_ref on our head ref. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 69083330ee16..cfb3cf711b34 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2440,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } + } else { + list_del_init(&locked_ref->cluster); } spin_unlock(&delayed_refs->lock); @@ -2462,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { - list_del_init(&locked_ref->cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } -- cgit v1.2.3 From d73068018419c5999f594a52998621947dc1f7d0 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 9 Aug 2013 15:41:36 +0100 Subject: Btrfs: fix race between removing a dev and writing sbs This change fixes an issue when removing a device and writing all super blocks run simultaneously. Here's the steps necessary for the issue to happen: 1) disk-io.c:write_all_supers() gets a number of N devices from the super_copy, so it will not panic if it fails to write super blocks for N - 1 devices; 2) Then it tries to acquire the device_list_mutex, but blocks because volumes.c:btrfs_rm_device() got it first; 3) btrfs_rm_device() removes the device from the list, then unlocks the mutex and after the unlock it updates the number of devices in super_copy to N - 1. 4) write_all_supers() finally acquires the mutex, iterates over all the devices in the list and gets N - 1 errors, that is, it failed to write super blocks to all the devices; 5) Because write_all_supers() thinks there are a total of N devices, it considers N - 1 errors to be ok, and therefore won't panic. So this change just makes sure that write_all_supers() reads the number of devices from super_copy after it acquires the device_list_mutex. Conversely, it changes btrfs_rm_device() to update the number of devices in super_copy before it releases the device list mutex. The code path to add a new device (volumes.c:btrfs_init_new_device), already has the right behaviour: it updates the number of devices in super_copy while holding the device_list_mutex. The only code path that doesn't lock the device list mutex before updating the number of devices in the super copy is disk-io.c:next_root_backup(), called by open_ctree() during mount time where concurrency issues can't happen. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/volumes.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 940bc486a0f8..6d642e487229 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3365,7 +3365,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); backup_super_roots(root->fs_info); @@ -3374,6 +3373,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); head = &root->fs_info->fs_devices->devices; + max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; if (do_barriers) { ret = barrier_all_devices(root->fs_info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 34068b887f14..3f1c2c200691 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1620,7 +1620,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) /* * the device list mutex makes sure that we don't change * the device list while someone else is writing out all - * the device supers. + * the device supers. Whoever is writing all supers, should + * lock the device list mutex before getting the number of + * devices in the super block (super_copy). Conversely, + * whoever updates the number of devices in the super block + * (super_copy) should hold the device list mutex. */ cur_devices = device->fs_devices; @@ -1644,10 +1648,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->open_devices--; call_rcu(&device->rcu, free_device); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; -- cgit v1.2.3 From f71717502460c5cd6409b66835b17ae00af6d5f1 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 12 Aug 2013 20:56:58 +0100 Subject: Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl The handler for the ioctl BTRFS_IOC_FS_INFO was reading the number of devices before acquiring the device list mutex. This could lead to inconsistent results because the update of the device list and the number of devices counter (amongst other counters related to the device list) are updated in volumes.c while holding the device list mutex - except for 2 places, one was volumes.c:btrfs_prepare_sprout() and the other was volumes.c:device_list_add(). For example, if we have 2 devices, with IDs 1 and 2 and then add a new device, with ID 3, and while adding the device is in progress an BTRFS_IOC_FS_INFO ioctl arrives, it could return a number of devices of 2 and a max dev id of 3. This would be incorrect. Also, this ioctl handler was reading the fsid while it can be updated concurrently. This can happen when while a new device is being added and the current filesystem is in seeding mode. Example: $ mkfs.btrfs -f /dev/sdb1 $ mkfs.btrfs -f /dev/sdb2 $ btrfstune -S 1 /dev/sdb1 $ mount /dev/sdb1 /mnt/test $ btrfs device add /dev/sdb2 /mnt/test If during the last step a BTRFS_IOC_FS_INFO ioctl was requested, it could read an fsid that was never valid (some bits part of the old fsid and others part of the new fsid). Also, it could read a number of devices that doesn't match the number of devices in the list and the max device id, as explained before. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- fs/btrfs/volumes.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0ce93ac17f69..ddb4bc1252ad 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2453,10 +2453,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) if (!fi_args) return -ENOMEM; + mutex_lock(&fs_devices->device_list_mutex); fi_args->num_devices = fs_devices->num_devices; memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); - mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->devid > fi_args->max_id) fi_args->max_id = device->devid; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3f1c2c200691..74614e3b5ad3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -492,10 +492,10 @@ static noinline int device_list_add(const char *path, mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); device->fs_devices = fs_devices; - fs_devices->num_devices++; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); if (!name) @@ -1852,7 +1852,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); list_for_each_entry(device, &seed_devices->devices, dev_list) { @@ -1868,6 +1867,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) generate_random_uuid(fs_devices->fsid); memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); -- cgit v1.2.3 From 742916b885edbc6453b4769458959929746e8e7e Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 6 Aug 2013 10:29:54 +0800 Subject: Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC Currently, only add_delayed_refs have to allocate with GFP_ATOMIC, So just pass arg 'gfp_t' to decide which allocation mode. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Reviewed-by: Jan Schmidt Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3da8f42e08a9..f3cb19114c95 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -160,12 +160,12 @@ struct __prelim_ref { static int __add_prelim_ref(struct list_head *head, u64 root_id, struct btrfs_key *key, int level, - u64 parent, u64 wanted_disk_byte, int count) + u64 parent, u64 wanted_disk_byte, int count, + gfp_t gfp_mask) { struct __prelim_ref *ref; - /* in case we're adding delayed refs, we're holding the refs spinlock */ - ref = kmalloc(sizeof(*ref), GFP_ATOMIC); + ref = kmalloc(sizeof(*ref), gfp_mask); if (!ref) return -ENOMEM; @@ -547,7 +547,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ref = btrfs_delayed_node_to_tree_ref(node); ret = __add_prelim_ref(prefs, ref->root, &op_key, ref->level + 1, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_BLOCK_REF_KEY: { @@ -557,7 +557,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ret = __add_prelim_ref(prefs, ref->root, NULL, ref->level + 1, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_EXTENT_DATA_REF_KEY: { @@ -569,7 +569,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref->offset; ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_DATA_REF_KEY: { @@ -582,7 +582,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref->offset; ret = __add_prelim_ref(prefs, ref->root, &key, 0, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } default: @@ -656,7 +656,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, *info_level + 1, offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -665,13 +665,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, sdref = (struct btrfs_shared_data_ref *)(iref + 1); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, offset, NULL, *info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -686,7 +686,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -737,7 +737,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, info_level + 1, key.offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -747,13 +747,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, struct btrfs_shared_data_ref); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, key.offset, NULL, info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -769,7 +769,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: -- cgit v1.2.3 From b9e9a6cbc6d25b89d8007e5a680319e07921ead8 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 9 Aug 2013 13:25:36 +0800 Subject: Btrfs: allocate prelim_ref with a slab allocater struct __prelim_ref is allocated and freed frequently when walking backref tree, using slab allocater can not only speed up allocating but also detect memory leaks. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Reviewed-by: Jan Schmidt Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 33 +++++++++++++++++++++++++++------ fs/btrfs/backref.h | 2 ++ fs/btrfs/super.c | 8 ++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f3cb19114c95..0552a599b28f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *btrfs_prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", + sizeof(struct __prelim_ref), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_prelim_ref_cache) + return -ENOMEM; + return 0; +} + +void btrfs_prelim_ref_exit(void) +{ + if (btrfs_prelim_ref_cache) + kmem_cache_destroy(btrfs_prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, { struct __prelim_ref *ref; - ref = kmalloc(sizeof(*ref), gfp_mask); + ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -368,7 +388,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, /* additional parents require new refs being added here */ while ((node = ulist_next(parents, &uiter))) { - new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, + GFP_NOFS); if (!new_ref) { ret = -ENOMEM; goto out; @@ -492,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode) ref1->count += ref2->count; list_del(&ref2->list); - kfree(ref2); + kmem_cache_free(btrfs_prelim_ref_cache, ref2); } } @@ -955,7 +976,7 @@ again: } } list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } out: @@ -963,13 +984,13 @@ out: while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } while (!list_empty(&prefs_delayed)) { ref = list_first_entry(&prefs_delayed, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } return ret; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8f2e76702932..a910b27a8ad9 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, struct btrfs_inode_extref **ret_extref, u64 *found_off); +int __init btrfs_prelim_ref_init(void); +void btrfs_prelim_ref_exit(void); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4d5d0f3eb0d8..3aab10ce63e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,7 @@ #include "rcu-string.h" #include "dev-replace.h" #include "free-space-cache.h" +#include "backref.h" #include "tests/btrfs-tests.h" #define CREATE_TRACE_POINTS @@ -1810,6 +1811,10 @@ static int __init init_btrfs_fs(void) if (err) goto free_auto_defrag; + err = btrfs_prelim_ref_init(); + if (err) + goto free_prelim_ref; + err = btrfs_interface_init(); if (err) goto free_delayed_ref; @@ -1830,6 +1835,8 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); +free_prelim_ref: + btrfs_prelim_ref_exit(); free_delayed_ref: btrfs_delayed_ref_exit(); free_auto_defrag: @@ -1856,6 +1863,7 @@ static void __exit exit_btrfs_fs(void) btrfs_delayed_ref_exit(); btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); + btrfs_prelim_ref_exit(); ordered_data_exit(); extent_map_exit(); extent_io_exit(); -- cgit v1.2.3 From 9d565ba4330db102127f19d159609df5c3a375d8 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 9 Aug 2013 17:08:40 +0200 Subject: Btrfs: get rid of one BUG() in write_all_supers() The second round uses btrfs_error() and return -EIO, the first round can handle write errors the same way. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6d642e487229..0f75a01f69e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3416,8 +3416,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) printk(KERN_ERR "btrfs: %d errors while writing supers\n", total_errors); - /* This shouldn't happen. FUA is masked off if unsupported */ - BUG(); + /* FUA is masked off if unsupported and can't be the reason */ + btrfs_error(root->fs_info, -EIO, + "%d errors while writing supers", total_errors); + return -EIO; } total_errors = 0; -- cgit v1.2.3 From 48475471728f060bfd2e686f592ef208d3ba8b7d Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 23 Aug 2013 10:34:42 +0200 Subject: Btrfs: fix for patch "cleanup: don't check the same thing twice" Mitch Harder noticed that the patch 3c64a1a mentioned in the subject line was causing a kernel BUG() on snapshot deletion. The patch was wrong. It did not handle cached roots correctly. The check for root_refs == 0 was removed everywhere where btrfs_read_fs_root_no_name() had been used to retrieve the root, because this check was already dealt with in btrfs_read_fs_root_no_name(). But in the case when the root was found in the cache, there was no such check. This patch adds the missing check in the case where the root is found in the cache. Reported-by: Mitch Harder Signed-off-by: Stefan Behrens Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0f75a01f69e2..21d1d066758e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1584,8 +1584,11 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, location->objectid); - if (root) + if (root) { + if (btrfs_root_refs(&root->root_item) == 0) + return ERR_PTR(-ENOENT); return root; + } root = btrfs_read_fs_root(fs_info->tree_root, location); if (IS_ERR(root)) -- cgit v1.2.3 From 23fa76b0ba78b7d84708d9ee683587d8a5bbceef Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Fri, 23 Aug 2013 10:34:43 +0200 Subject: Btrf: cleanup: don't check for root_refs == 0 twice btrfs_read_fs_root_no_name() already checks if btrfs_root_refs() is zero and returns ENOENT in this case. There is no need to do it again in three more places. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file.c | 5 ----- fs/btrfs/relocation.c | 3 --- fs/btrfs/scrub.c | 5 ----- 3 files changed, 13 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5e7ea996f105..5ba87b0d2ef8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -310,11 +310,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, goto cleanup; } - if (btrfs_root_refs(&inode_root->root_item) == 0) { - ret = -ENOENT; - goto cleanup; - } - key.objectid = defrag->ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cf5e30ff7093..aacc2121e87c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2354,9 +2354,6 @@ again: if (IS_ERR(root)) continue; - if (btrfs_root_refs(&root->root_item) == 0) - continue; - trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ec6a33a20200..0afcd452fcb3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3217,11 +3217,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) return PTR_ERR(local_root); } - if (btrfs_root_refs(&local_root->root_item) == 0) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return -ENOENT; - } - key.type = BTRFS_INODE_ITEM_KEY; key.objectid = inum; key.offset = 0; -- cgit v1.2.3 From 726551ebc79c0a41f66376463ebe8c84c89c1151 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Aug 2013 13:45:53 -0400 Subject: Btrfs: adjust the fs_devices->missing count on unmount I noticed that if I tried to mount a file system with -o degraded after having done it once already we would fail to mount. This is because the fs_devices->missing count was getting bumped everytime we mounted, but not getting reset whenever we unmounted. To fix this we just drop the missing count as we're closing devices to make sure this doesn't happen. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 74614e3b5ad3..f42e41290124 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -673,6 +673,8 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; + if (device->missing) + fs_devices->missing_devices--; new_device = btrfs_alloc_device(NULL, &device->devid, device->uuid); -- cgit v1.2.3 From 2e17c7c65e5dcbb53a6384a3406244f62bca189c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Aug 2013 16:53:15 -0400 Subject: Btrfs: add support for asserts One of the complaints we get a lot is how many BUG_ON()'s we have. So to help with this I'm introducing a kconfig option to enable/disable a new ASSERT() mechanism much like what XFS does. This will allow us developers to still get our nice panics but allow users/distros to compile them out. With this we can go through and convert any BUG_ON()'s that we have to catch actual programming mistakes to the new ASSERT() and then fix everybody else to return errors. This will also allow developers to leave sanity checks in their new code to make sure we don't trip over problems while testing stuff and vetting new features. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/Kconfig | 9 +++++++++ fs/btrfs/ctree.h | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 2b3b83296977..398cbd517be2 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -72,3 +72,12 @@ config BTRFS_DEBUG performance, or export extra information via sysfs. If unsure, say N. + +config BTRFS_ASSERT + bool "Btrfs assert support" + depends on BTRFS_FS + help + Enable run-time assertion checking. This will result in panics if + any of the assertions trip. This is meant for btrfs developers only. + + If unsure, say N. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c90be01cbe67..1f3fd584e5f4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3814,6 +3814,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) +#ifdef CONFIG_BTRFS_ASSERT + +static inline void assfail(char *expr, char *file, int line) +{ + printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d", + expr, file, line); + BUG(); +} + +#define ASSERT(expr) \ + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) +#else +#define ASSERT(expr) ((void)0) +#endif + +#define btrfs_assert() __printf(5, 6) void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); -- cgit v1.2.3 From b12d6869f67a95692017d26313ea5736d4043d0f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Aug 2013 17:14:08 -0400 Subject: Btrfs: convert all bug_ons in free-space-cache.c All of these are logic checks to make sure we're not breaking anything, so convert them over to ASSERT(). Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 23182648976b..ef3bea7bb257 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -308,7 +308,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl) static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) { - BUG_ON(io_ctl->index >= io_ctl->num_pages); + ASSERT(io_ctl->index < io_ctl->num_pages); io_ctl->page = io_ctl->pages[io_ctl->index++]; io_ctl->cur = kmap(io_ctl->page); io_ctl->orig = io_ctl->cur; @@ -728,7 +728,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, goto free_cache; } } else { - BUG_ON(!num_bitmaps); + ASSERT(num_bitmaps); num_bitmaps--; e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!e->bitmap) { @@ -1028,7 +1028,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, leaf = path->nodes[0]; if (ret > 0) { struct btrfs_key found_key; - BUG_ON(!path->slots[0]); + ASSERT(path->slots[0]); path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || @@ -1116,7 +1116,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, u64 offset) { - BUG_ON(offset < bitmap_start); + ASSERT(offset >= bitmap_start); offset -= bitmap_start; return (unsigned long)(div_u64(offset, unit)); } @@ -1271,7 +1271,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, if (n) { entry = rb_entry(n, struct btrfs_free_space, offset_index); - BUG_ON(entry->offset > offset); + ASSERT(entry->offset <= offset); } else { if (fuzzy) return entry; @@ -1335,7 +1335,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl, { int ret = 0; - BUG_ON(!info->bitmap && !info->bytes); + ASSERT(info->bytes || info->bitmap); ret = tree_insert_offset(&ctl->free_space_offset, info->offset, &info->offset_index, (info->bitmap != NULL)); if (ret) @@ -1358,7 +1358,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) max_bitmaps = max(max_bitmaps, 1); - BUG_ON(ctl->total_bitmaps > max_bitmaps); + ASSERT(ctl->total_bitmaps <= max_bitmaps); /* * The goal is to keep the total amount of memory used per 1gb of space @@ -1402,7 +1402,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - BUG_ON(start + count > BITS_PER_BITMAP); + ASSERT(start + count <= BITS_PER_BITMAP); bitmap_clear(info->bitmap, start, count); @@ -1425,7 +1425,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - BUG_ON(start + count > BITS_PER_BITMAP); + ASSERT(start + count <= BITS_PER_BITMAP); bitmap_set(info->bitmap, start, count); @@ -1741,7 +1741,7 @@ no_cluster_bitmap: bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!bitmap_info) { - BUG_ON(added); + ASSERT(added == 0); goto new_bitmap; } @@ -1881,7 +1881,7 @@ out: if (ret) { printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); - BUG_ON(ret == -EEXIST); + ASSERT(ret != -EEXIST); } return ret; @@ -2369,7 +2369,7 @@ again: rb_erase(&entry->offset_index, &ctl->free_space_offset); ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 1); - BUG_ON(ret); /* -EEXIST; Logic error */ + ASSERT(!ret); /* -EEXIST; Logic error */ trace_btrfs_setup_cluster(block_group, cluster, total_found * ctl->unit, 1); @@ -2462,7 +2462,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 0); total_size += entry->bytes; - BUG_ON(ret); /* -EEXIST; Logic error */ + ASSERT(!ret); /* -EEXIST; Logic error */ } while (node && entry != last); cluster->max_size = max_extent; @@ -2853,7 +2853,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) ret = search_bitmap(ctl, entry, &offset, &count); /* Logic error; Should be empty if it can't find anything */ - BUG_ON(ret); + ASSERT(!ret); ino = offset; bitmap_clear_bits(ctl, entry, offset, 1); -- cgit v1.2.3 From 77cef2ec5484564eca6bd12a2b4a1e88fd766fbc Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 29 Aug 2013 13:57:21 -0400 Subject: Btrfs: allow partial ordered extent completion We currently have this problem where you can truncate pages that have not yet been written for an ordered extent. We do this because the truncate will be coming behind to clean us up anyway so what's the harm right? Well if truncate fails for whatever reason we leave an orphan item around for the file to be cleaned up later. But if the user goes and truncates up the file and tries to read from the area that had been discarded previously they will get a csum error because we never actually wrote that data out. This patch fixes this by allowing us to either discard the ordered extent completely, by which I mean we just free up the space we had allocated and not add the file extent, or adjust the length of the file extent we write. We do this by setting the length we truncated down to in the ordered extent, and then we set the file extent length and ram bytes to this length. The total disk space stays unchanged since we may be compressed and we can't just chop off the disk space, but at least this way the file extent only points to the valid data. Then when the file extent is free'd the extent and csums will be freed normally. This patch is needed for the next series which will give us more graceful recovery of failed truncates. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 60 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/ordered-data.c | 13 +++++++---- fs/btrfs/ordered-data.h | 7 ++++++ 3 files changed, 62 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d4e2de3e879..f0e41b840739 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2562,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) struct extent_state *cached_state = NULL; struct new_sa_defrag_extent *new = NULL; int compress_type = 0; - int ret; + int ret = 0; + u64 logical_len = ordered_extent->len; bool nolock; + bool truncated = false; nolock = btrfs_is_free_space_inode(inode); @@ -2572,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { + truncated = true; + logical_len = ordered_extent->truncated_len; + /* Truncated the entire extent, don't bother adding */ + if (!logical_len) + goto out; + } + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ btrfs_ordered_update_i_size(inode, 0, ordered_extent); @@ -2627,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + - ordered_extent->len); + logical_len); } else { BUG_ON(root == root->fs_info->tree_root); ret = insert_reserved_file_extent(trans, inode, ordered_extent->file_offset, ordered_extent->start, ordered_extent->disk_len, - ordered_extent->len, - ordered_extent->len, + logical_len, logical_len, compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); } @@ -2667,17 +2676,27 @@ out: if (trans) btrfs_end_transaction(trans, root); - if (ret) { - clear_extent_uptodate(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, NULL, GFP_NOFS); + if (ret || truncated) { + u64 start, end; + + if (truncated) + start = ordered_extent->file_offset + logical_len; + else + start = ordered_extent->file_offset; + end = ordered_extent->file_offset + ordered_extent->len - 1; + clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS); + + /* Drop the cache for the part of the extent we didn't write. */ + btrfs_drop_extent_cache(inode, start, end, 0); /* * If the ordered extent had an IOERR or something else went * wrong we need to return the space for this ordered extent - * back to the allocator. + * back to the allocator. We only free the extent in the + * truncated case if we didn't write out the extent at all. */ - if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && + if ((ret || !logical_len) && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) btrfs_free_reserved_extent(root, ordered_extent->start, ordered_extent->disk_len); @@ -7336,10 +7355,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, * whoever cleared the private bit is responsible * for the finish_ordered_io */ - if (TestClearPagePrivate2(page) && - btrfs_dec_test_ordered_pending(inode, &ordered, page_start, - PAGE_CACHE_SIZE, 1)) { - btrfs_finish_ordered_io(ordered); + if (TestClearPagePrivate2(page)) { + struct btrfs_ordered_inode_tree *tree; + u64 new_len; + + tree = &BTRFS_I(inode)->ordered_tree; + + spin_lock_irq(&tree->lock); + set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); + new_len = page_start - ordered->file_offset; + if (new_len < ordered->truncated_len) + ordered->truncated_len = new_len; + spin_unlock_irq(&tree->lock); + + if (btrfs_dec_test_ordered_pending(inode, &ordered, + page_start, + PAGE_CACHE_SIZE, 1)) + btrfs_finish_ordered_io(ordered); } btrfs_put_ordered_extent(ordered); cached_state = NULL; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f2023ccb7cf6..966b413a33b8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->bytes_left = len; entry->inode = igrab(inode); entry->compress_type = compress_type; + entry->truncated_len = (u64)-1; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -920,12 +921,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *test; int ret = 1; - if (ordered) + spin_lock_irq(&tree->lock); + if (ordered) { offset = entry_end(ordered); - else + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) + offset = min(offset, + ordered->file_offset + + ordered->truncated_len); + } else { offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); - - spin_lock_irq(&tree->lock); + } disk_i_size = BTRFS_I(inode)->disk_i_size; /* truncate file */ diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 68844d59ee6f..d9a5aa097b4f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -69,6 +69,7 @@ struct btrfs_ordered_sum { * the isize. */ #define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered ordered extent */ +#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ struct btrfs_ordered_extent { /* logical offset in the file */ @@ -96,6 +97,12 @@ struct btrfs_ordered_extent { */ u64 outstanding_isize; + /* + * If we get truncated we need to adjust the file extent we enter for + * this ordered extent so that we do not expose stale data. + */ + u64 truncated_len; + /* flags (described above) */ unsigned long flags; -- cgit v1.2.3 From e57138b3e96e45a63124492e736612378096290f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 21 Aug 2013 11:44:48 +0800 Subject: btrfs: return btrfs error code for dev excl ops err now threads can return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS as defined in btrfs.h for the dev excl operation error in the FS, which means with this kernel would stop logging (almost an user error) into the /var/log/messages v2: accepts Josef' comment Signed-off-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ddb4bc1252ad..1a5b9462dd9a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1394,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); mnt_drop_write_file(file); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -2379,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -3673,8 +3671,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINPROGRESS; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } else { ret = btrfs_dev_replace_start(root, p); atomic_set( @@ -3918,8 +3915,7 @@ again: } else { /* this is (1) */ mutex_unlock(&fs_info->balance_mutex); - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINVAL; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out; } -- cgit v1.2.3 From ed84885d1e03839b8508ab707657d8d1002edea6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Aug 2013 10:32:13 +0300 Subject: btrfs: reuse kbasename helper To get name of the file from a pathname let's use kbasename() helper. It allows to simplify code a bit. Signed-off-by: Andy Shevchenko Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/send.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index cbe92da5f33b..b4b15467426b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "send.h" #include "backref.h" @@ -2601,7 +2602,6 @@ static int record_ref(struct list_head *head, u64 dir, u64 dir_gen, struct fs_path *path) { struct recorded_ref *ref; - char *tmp; ref = kmalloc(sizeof(*ref), GFP_NOFS); if (!ref) @@ -2611,20 +2611,14 @@ static int record_ref(struct list_head *head, u64 dir, ref->dir_gen = dir_gen; ref->full_path = path; - tmp = strrchr(ref->full_path->start, '/'); - if (!tmp) { - ref->name_len = ref->full_path->end - ref->full_path->start; - ref->name = ref->full_path->start; + ref->name = (char *)kbasename(ref->full_path->start); + ref->name_len = ref->full_path->end - ref->name; + ref->dir_path = ref->full_path->start; + if (ref->name == ref->full_path->start) ref->dir_path_len = 0; - ref->dir_path = ref->full_path->start; - } else { - tmp++; - ref->name_len = ref->full_path->end - tmp; - ref->name = tmp; - ref->dir_path = ref->full_path->start; + else ref->dir_path_len = ref->full_path->end - ref->full_path->start - 1 - ref->name_len; - } list_add_tail(&ref->list, head); return 0; -- cgit v1.2.3 From d8f980391f418e567c805d3e40be5d63bfc4c8ea Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 24 Aug 2013 19:51:06 +0100 Subject: Btrfs: fix memory leak of uuid_root in free_fs_info Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1f3fd584e5f4..3c1da6f98a4d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3460,6 +3460,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->dev_root); kfree(fs_info->csum_root); kfree(fs_info->quota_root); + kfree(fs_info->uuid_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kfree(fs_info); -- cgit v1.2.3 From 795a33213973cf0195198132162862fb05929425 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Aug 2013 13:50:44 +0300 Subject: Btrfs: stop refusing the relocation of chunk 0 AFAICT chunk 0 is no longer special, and so it should be restriped just like every other chunk. One reason for this change is us refusing the relocation can lead to filesystems that can only be mounted ro, and never rw -- see the bugzilla [1] for details. The other reason is that device removal code is already doing this: it will happily relocate chunk 0 is part of shrinking the device. [1] https://bugzilla.kernel.org/show_bug.cgi?id=60594 Reported-by: Xavier Bassery Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f42e41290124..603cce85764f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2997,10 +2997,6 @@ again: if (found_key.objectid != key.objectid) break; - /* chunk zero is special */ - if (found_key.offset == 0) - break; - chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); if (!counting) { @@ -3036,6 +3032,8 @@ again: spin_unlock(&fs_info->balance_lock); } loop: + if (found_key.offset == 0) + break; key.offset = found_key.offset - 1; } -- cgit v1.2.3 From f45388f3874535062526ec071284d836f5a4b5de Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 28 Aug 2013 10:28:34 +0100 Subject: Btrfs: fix deadlock in uuid scan kthread If there's an ongoing transaction when the uuid scan kthread attempts to create one, the kthread will block, waiting for that transaction to finish while it's keeping locks on the tree root, and in turn the existing transaction is waiting for those locks to be free. The stack trace reported by the kernel follows. [36700.671601] INFO: task btrfs-uuid:15480 blocked for more than 120 seconds. [36700.671602] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [36700.671602] btrfs-uuid D 0000000000000000 0 15480 2 0x00000000 [36700.671604] ffff880710bd5b88 0000000000000046 ffff8803d36ba850 0000000000030000 [36700.671605] ffff8806d76dc530 ffff880710bd5fd8 ffff880710bd5fd8 ffff880710bd5fd8 [36700.671607] ffff8808098ac530 ffff8806d76dc530 ffff880710bd5b98 ffff8805e4508e40 [36700.671608] Call Trace: [36700.671610] [] schedule+0x29/0x70 [36700.671620] [] wait_current_trans.isra.33+0xbf/0x120 [btrfs] [36700.671623] [] ? add_wait_queue+0x60/0x60 [36700.671629] [] start_transaction+0x3d6/0x530 [btrfs] [36700.671636] [] ? btrfs_get_token_32+0x64/0xf0 [btrfs] [36700.671642] [] btrfs_start_transaction+0x1b/0x20 [btrfs] [36700.671649] [] btrfs_uuid_scan_kthread+0x211/0x3d0 [btrfs] [36700.671655] [] ? __btrfs_open_devices+0x2a0/0x2a0 [btrfs] [36700.671657] [] kthread+0xc0/0xd0 [36700.671659] [] ? flush_kthread_worker+0xb0/0xb0 [36700.671661] [] ret_from_fork+0x7c/0xb0 [36700.671662] [] ? flush_kthread_worker+0xb0/0xb0 [36700.671663] INFO: task btrfs:15481 blocked for more than 120 seconds. [36700.671664] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [36700.671665] btrfs D 0000000000000000 0 15481 15212 0x00000004 [36700.671666] ffff880248cbf4c8 0000000000000086 ffff8803d36ba700 ffff8801dbd5c280 [36700.671668] ffff880807815c40 ffff880248cbffd8 ffff880248cbffd8 ffff880248cbffd8 [36700.671669] ffff8805e86a0000 ffff880807815c40 ffff880248cbf4d8 ffff8801dbd5c280 [36700.671670] Call Trace: [36700.671672] [] schedule+0x29/0x70 [36700.671679] [] btrfs_tree_lock+0x6d/0x230 [btrfs] [36700.671680] [] ? add_wait_queue+0x60/0x60 [36700.671685] [] btrfs_search_slot+0x999/0xb00 [btrfs] [36700.671691] [] ? btrfs_lookup_first_ordered_extent+0x5e/0xb0 [btrfs] [36700.671698] [] __btrfs_write_out_cache+0x8c4/0xa80 [btrfs] [36700.671704] [] btrfs_write_out_cache+0xb2/0xf0 [btrfs] [36700.671710] [] ? free_extent_buffer+0x61/0xc0 [btrfs] [36700.671716] [] btrfs_write_dirty_block_groups+0x562/0x650 [btrfs] [36700.671723] [] commit_cowonly_roots+0x171/0x24b [btrfs] [36700.671729] [] btrfs_commit_transaction+0x4fe/0xa10 [btrfs] [36700.671735] [] create_subvol+0x5c0/0x636 [btrfs] [36700.671742] [] btrfs_mksubvol.isra.60+0x33f/0x3f0 [btrfs] [36700.671747] [] btrfs_ioctl_snap_create_transid+0x142/0x190 [btrfs] [36700.671752] [] ? btrfs_ioctl_snap_create+0x2c/0x80 [btrfs] [36700.671757] [] btrfs_ioctl_snap_create+0x5e/0x80 [btrfs] [36700.671759] [] ? handle_pte_fault+0x84/0x920 [36700.671764] [] btrfs_ioctl+0xf0b/0x1d00 [btrfs] [36700.671766] [] ? handle_mm_fault+0x210/0x310 [36700.671768] [] ? __do_page_fault+0x284/0x4e0 [36700.671770] [] do_vfs_ioctl+0x96/0x550 [36700.671772] [] ? __sb_end_write+0x33/0x70 [36700.671774] [] SyS_ioctl+0x91/0xb0 [36700.671775] [] system_call_fastpath+0x16/0x1b Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 603cce85764f..0db165ee4340 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3463,7 +3463,7 @@ static int btrfs_uuid_scan_kthread(void *data) int slot; struct btrfs_root_item root_item; u32 item_size; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); if (!path) { @@ -3501,13 +3501,18 @@ static int btrfs_uuid_scan_kthread(void *data) if (item_size < sizeof(root_item)) goto skip; - trans = NULL; read_extent_buffer(eb, &root_item, btrfs_item_ptr_offset(eb, slot), (int)sizeof(root_item)); if (btrfs_root_refs(&root_item) == 0) goto skip; - if (!btrfs_is_empty_uuid(root_item.uuid)) { + + if (!btrfs_is_empty_uuid(root_item.uuid) || + !btrfs_is_empty_uuid(root_item.received_uuid)) { + if (trans) + goto update_tree; + + btrfs_release_path(path); /* * 1 - subvol uuid item * 1 - received_subvol uuid item @@ -3517,6 +3522,12 @@ static int btrfs_uuid_scan_kthread(void *data) ret = PTR_ERR(trans); break; } + continue; + } else { + goto skip; + } +update_tree: + if (!btrfs_is_empty_uuid(root_item.uuid)) { ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, root_item.uuid, BTRFS_UUID_KEY_SUBVOL, @@ -3524,22 +3535,11 @@ static int btrfs_uuid_scan_kthread(void *data) if (ret < 0) { pr_warn("btrfs: uuid_tree_add failed %d\n", ret); - btrfs_end_transaction(trans, - fs_info->uuid_root); break; } } if (!btrfs_is_empty_uuid(root_item.received_uuid)) { - if (!trans) { - /* 1 - received_subvol uuid item */ - trans = btrfs_start_transaction( - fs_info->uuid_root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - } ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, root_item.received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, @@ -3547,19 +3547,18 @@ static int btrfs_uuid_scan_kthread(void *data) if (ret < 0) { pr_warn("btrfs: uuid_tree_add failed %d\n", ret); - btrfs_end_transaction(trans, - fs_info->uuid_root); break; } } +skip: if (trans) { ret = btrfs_end_transaction(trans, fs_info->uuid_root); + trans = NULL; if (ret) break; } -skip: btrfs_release_path(path); if (key.offset < (u64)-1) { key.offset++; @@ -3578,6 +3577,8 @@ skip: out: btrfs_free_path(path); + if (trans && !IS_ERR(trans)) + btrfs_end_transaction(trans, fs_info->uuid_root); if (ret) pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); else -- cgit v1.2.3 From 7f4f6e0a3f6db9c214de38c792c483b8076bcd6a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 29 Aug 2013 16:43:28 -0400 Subject: Btrfs: only update disk_i_size as we remove extents This fixes a problem where if we fail a truncate we will leave the i_size set where we wanted to truncate to instead of where we were able to truncate to. Fix this by making btrfs_truncate_inode_items do the disk_i_size update as it removes extents, that way it will always be consistent with where its extents are. Then if the truncate fails at all we can update the in-ram i_size with what we have on disk and delete the orphan item. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f0e41b840739..6091ba9d2494 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3861,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 last_size = (u64)-1; u32 found_type = (u8)-1; int found_extent; int del_item; @@ -3958,6 +3959,11 @@ search_again: if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; + if (del_item) + last_size = found_key.offset; + else + last_size = new_size; + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); @@ -4069,6 +4075,8 @@ out: btrfs_abort_transaction(trans, root, ret); } error: + if (last_size != (u64)-1) + btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); return err; } @@ -4397,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) btrfs_inode_resume_unlocked_dio(inode); ret = btrfs_truncate(inode); - if (ret && inode->i_nlink) - btrfs_orphan_del(NULL, inode); + if (ret && inode->i_nlink) { + int err; + + /* + * failed to truncate, disk_i_size is only adjusted down + * as we remove extents, so it should represent the true + * size of the inode, so reset the in memory size and + * delete our orphan entry. + */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, inode); + return ret; + } + i_size_write(inode, BTRFS_I(inode)->disk_i_size); + err = btrfs_orphan_del(trans, inode); + if (err) + btrfs_abort_transaction(trans, root, err); + btrfs_end_transaction(trans, root); + } } return ret; @@ -7537,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode) u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); /* * Yes ladies and gentelment, this is indeed ugly. The fact is we have -- cgit v1.2.3 From 45d5fd14d22304c9a40d5aae75ec610f5d1cbb53 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 29 Aug 2013 16:51:22 -0400 Subject: Btrfs: don't use an async starter for most of our workers We only need an async starter if we can't make a GFP_NOFS allocation in our current path. This is the case for the endio stuff since it happens in IRQ context, but things like the caching thread workers and the delalloc flushers we can easily make this allocation and start threads right away. Also change the worker count for the caching thread pool. Traditionally we limited this to 2 since we took read locks while caching, but nowadays we do this lockless so there's no reason to limit the number of caching threads. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 21d1d066758e..4cbb00af92ff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2483,20 +2483,17 @@ int open_ctree(struct super_block *sb, &fs_info->generic_worker); btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, - fs_info->thread_pool_size), - &fs_info->generic_worker); + fs_info->thread_pool_size), NULL); btrfs_init_workers(&fs_info->caching_workers, "cache", - 2, &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the -- cgit v1.2.3 From d7396f07358a7c6e22c238d36d1d85f9d652a414 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 30 Aug 2013 15:46:43 +0100 Subject: Btrfs: optimize key searches in btrfs_search_slot When the binary search returns 0 (exact match), the target key will necessarily be at slot 0 of all nodes below the current one, so in this case the binary search is not needed because it will always return 0, and we waste time doing it, holding node locks for longer than necessary, etc. Below follow histograms with the times spent on the current approach of doing a binary search when the previous binary search returned 0, and times for the new approach, which directly picks the first item/child node in the leaf/node. Current approach: Count: 6682 Range: 35.000 - 8370.000; Mean: 85.837; Median: 75.000; Stddev: 106.429 Percentiles: 90th: 124.000; 95th: 145.000; 99th: 206.000 35.000 - 61.080: 1235 ################ 61.080 - 106.053: 4207 ##################################################### 106.053 - 183.606: 1122 ############## 183.606 - 317.341: 111 # 317.341 - 547.959: 6 | 547.959 - 8370.000: 1 | Approach proposed by this patch: Count: 6682 Range: 6.000 - 135.000; Mean: 16.690; Median: 16.000; Stddev: 7.160 Percentiles: 90th: 23.000; 95th: 27.000; 99th: 40.000 6.000 - 8.418: 58 # 8.418 - 11.670: 1149 ######################### 11.670 - 16.046: 2418 ##################################################### 16.046 - 21.934: 2098 ############################################## 21.934 - 29.854: 744 ################ 29.854 - 40.511: 154 ### 40.511 - 54.848: 41 # 54.848 - 74.136: 5 | 74.136 - 100.087: 9 | 100.087 - 135.000: 6 | These samples were captured during a run of the btrfs tests 001, 002 and 004 in the xfstests, with a leaf/node size of 4Kb. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5fa521bec07b..64346721173f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2426,6 +2426,40 @@ done: return ret; } +static void key_search_validate(struct extent_buffer *b, + struct btrfs_key *key, + int level) +{ +#ifdef CONFIG_BTRFS_ASSERT + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, key); + + if (level == 0) + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_leaf, items[0].key), + sizeof(disk_key))); + else + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_node, ptrs[0].key), + sizeof(disk_key))); +#endif +} + +static int key_search(struct extent_buffer *b, struct btrfs_key *key, + int level, int *prev_cmp, int *slot) +{ + if (*prev_cmp != 0) { + *prev_cmp = bin_search(b, key, level, slot); + return *prev_cmp; + } + + key_search_validate(b, key, level); + *slot = 0; + + return 0; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -2454,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int write_lock_level = 0; u8 lowest_level = 0; int min_write_lock_level; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -2484,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root min_write_lock_level = write_lock_level; again: + prev_cmp = -1; /* * we try very hard to do read locks on the root */ @@ -2584,7 +2620,7 @@ cow_done: if (!cow) btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2719,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2729,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: + prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2746,7 +2784,7 @@ again: */ btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; -- cgit v1.2.3