summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c87
-rw-r--r--fs/btrfs/btrfs_inode.h16
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.c455
-rw-r--r--fs/btrfs/ctree.h74
-rw-r--r--fs/btrfs/delayed-inode.c138
-rw-r--r--fs/btrfs/delayed-inode.h8
-rw-r--r--fs/btrfs/dir-item.c39
-rw-r--r--fs/btrfs/disk-io.c146
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/extent-tree.c389
-rw-r--r--fs/btrfs/extent_io.c309
-rw-r--r--fs/btrfs/extent_io.h57
-rw-r--r--fs/btrfs/extent_map.c155
-rw-r--r--fs/btrfs/file-item.c48
-rw-r--r--fs/btrfs/file.c201
-rw-r--r--fs/btrfs/free-space-cache.c345
-rw-r--r--fs/btrfs/inode.c292
-rw-r--r--fs/btrfs/ioctl.c52
-rw-r--r--fs/btrfs/locking.c280
-rw-r--r--fs/btrfs/locking.h36
-rw-r--r--fs/btrfs/ref-cache.c68
-rw-r--r--fs/btrfs/ref-cache.h52
-rw-r--r--fs/btrfs/relocation.c33
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/scrub.c69
-rw-r--r--fs/btrfs/struct-funcs.c100
-rw-r--r--fs/btrfs/super.c8
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c233
-rw-r--r--fs/btrfs/tree-log.c20
-rw-r--r--fs/btrfs/volumes.c25
-rw-r--r--fs/btrfs/xattr.c66
34 files changed, 2001 insertions, 1979 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b72dcf1cd25..40e6ac08c21f 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
+ export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
+
+btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index f66fc9959733..eb159aaa5a11 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -28,9 +28,7 @@
#include "btrfs_inode.h"
#include "xattr.h"
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-
-static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
{
int size;
const char *name;
@@ -111,7 +109,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
int ret, size = 0;
const char *name;
char *value = NULL;
- mode_t mode;
if (acl) {
ret = posix_acl_valid(acl);
@@ -122,13 +119,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
switch (type) {
case ACL_TYPE_ACCESS:
- mode = inode->i_mode;
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- ret = posix_acl_equiv_mode(acl, &mode);
+ ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
return ret;
- inode->i_mode = mode;
}
ret = 0;
break;
@@ -195,28 +190,6 @@ out:
return ret;
}
-int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
-{
- int error = -EAGAIN;
-
- if (flags & IPERM_FLAG_RCU) {
- if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
- error = -ECHILD;
-
- } else {
- struct posix_acl *acl;
- acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- }
- }
-
- return error;
-}
-
/*
* btrfs_init_acl is already generally called under fs_mutex, so the locking
* stuff has been fixed to work with that. If the locking stuff changes, we
@@ -244,31 +217,20 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
}
if (IS_POSIXACL(dir) && acl) {
- struct posix_acl *clone;
- mode_t mode;
-
if (S_ISDIR(inode->i_mode)) {
ret = btrfs_set_acl(trans, inode, acl,
ACL_TYPE_DEFAULT);
if (ret)
goto failed;
}
- clone = posix_acl_clone(acl, GFP_NOFS);
- ret = -ENOMEM;
- if (!clone)
- goto failed;
-
- mode = inode->i_mode;
- ret = posix_acl_create_masq(clone, &mode);
- if (ret >= 0) {
- inode->i_mode = mode;
- if (ret > 0) {
- /* we need an acl */
- ret = btrfs_set_acl(trans, inode, clone,
- ACL_TYPE_ACCESS);
- }
+ ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+ if (ret < 0)
+ return ret;
+
+ if (ret > 0) {
+ /* we need an acl */
+ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
}
- posix_acl_release(clone);
}
failed:
posix_acl_release(acl);
@@ -278,7 +240,7 @@ failed:
int btrfs_acl_chmod(struct inode *inode)
{
- struct posix_acl *acl, *clone;
+ struct posix_acl *acl;
int ret = 0;
if (S_ISLNK(inode->i_mode))
@@ -291,17 +253,11 @@ int btrfs_acl_chmod(struct inode *inode)
if (IS_ERR_OR_NULL(acl))
return PTR_ERR(acl);
- clone = posix_acl_clone(acl, GFP_KERNEL);
+ ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (ret)
+ return ret;
+ ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS);
posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
-
- ret = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!ret)
- ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
-
- posix_acl_release(clone);
-
return ret;
}
@@ -318,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = {
.get = btrfs_xattr_acl_get,
.set = btrfs_xattr_acl_set,
};
-
-#else /* CONFIG_BTRFS_FS_POSIX_ACL */
-
-int btrfs_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-
-int btrfs_init_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-
-#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 52d7eca8c7bf..502b9e988679 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -34,6 +34,9 @@ struct btrfs_inode {
*/
struct btrfs_key location;
+ /* Lock for counters */
+ spinlock_t lock;
+
/* the extent_tree has caches of all the extent mappings to disk */
struct extent_map_tree extent_tree;
@@ -134,8 +137,8 @@ struct btrfs_inode {
* items we think we'll end up using, and reserved_extents is the number
* of extent items we've reserved metadata for.
*/
- atomic_t outstanding_extents;
- atomic_t reserved_extents;
+ unsigned outstanding_extents;
+ unsigned reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
BTRFS_I(inode)->disk_i_size = size;
}
+static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
+ struct inode *inode)
+{
+ if (root == root->fs_info->tree_root ||
+ BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+ return true;
+ return false;
+}
+
#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bfe42b03eaf9..8ec5d86f1734 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
u64 first_byte = disk_start;
struct block_device *bdev;
int ret;
+ int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
- ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret);
+ if (!skip_sum) {
+ ret = btrfs_csum_one_bio(root, inode, bio,
+ start, 1);
+ BUG_ON(ret);
+ }
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
- ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret);
+ if (!skip_sum) {
+ ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+ BUG_ON(ret);
+ }
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d84089349c82..011cab3aca8d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
{
int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
- if (p->nodes[i] && p->locks[i])
- btrfs_set_lock_blocking(p->nodes[i]);
+ if (!p->nodes[i] || !p->locks[i])
+ continue;
+ btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
+ if (p->locks[i] == BTRFS_READ_LOCK)
+ p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
+ else if (p->locks[i] == BTRFS_WRITE_LOCK)
+ p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
}
}
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
* for held
*/
noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
- struct extent_buffer *held)
+ struct extent_buffer *held, int held_rw)
{
int i;
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
* really sure by forcing the path to blocking before we clear
* the path blocking.
*/
- if (held)
- btrfs_set_lock_blocking(held);
+ if (held) {
+ btrfs_set_lock_blocking_rw(held, held_rw);
+ if (held_rw == BTRFS_WRITE_LOCK)
+ held_rw = BTRFS_WRITE_LOCK_BLOCKING;
+ else if (held_rw == BTRFS_READ_LOCK)
+ held_rw = BTRFS_READ_LOCK_BLOCKING;
+ }
btrfs_set_path_blocking(p);
#endif
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
- if (p->nodes[i] && p->locks[i])
- btrfs_clear_lock_blocking(p->nodes[i]);
+ if (p->nodes[i] && p->locks[i]) {
+ btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
+ if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
+ p->locks[i] = BTRFS_WRITE_LOCK;
+ else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
+ p->locks[i] = BTRFS_READ_LOCK;
+ }
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (held)
- btrfs_clear_lock_blocking(held);
+ btrfs_clear_lock_blocking_rw(held, held_rw);
#endif
}
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)
if (!p->nodes[i])
continue;
if (p->locks[i]) {
- btrfs_tree_unlock(p->nodes[i]);
+ btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
p->locks[i] = 0;
}
free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
return eb;
}
+/* loop around taking references on and locking the root node of the
+ * tree until you end up with a lock on the root. A locked buffer
+ * is returned, with a reference held.
+ */
+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+{
+ struct extent_buffer *eb;
+
+ while (1) {
+ eb = btrfs_root_node(root);
+ btrfs_tree_read_lock(eb);
+ if (eb == root->node)
+ break;
+ btrfs_tree_read_unlock(eb);
+ free_extent_buffer(eb);
+ }
+ return eb;
+}
+
/* cowonly root (everything not a reference counted cow subvolume), just get
* put onto a simple dirty list. transaction.c walks this to make sure they
* get properly updated on disk.
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
for (i = start_slot; i < end_slot; i++) {
int close = 1;
- if (!parent->map_token) {
- map_extent_buffer(parent,
- btrfs_node_key_ptr_offset(i),
- sizeof(struct btrfs_key_ptr),
- &parent->map_token, &parent->kaddr,
- &parent->map_start, &parent->map_len,
- KM_USER1);
- }
btrfs_node_key(parent, &disk_key, i);
if (!progress_passed && comp_keys(&disk_key, progress) < 0)
continue;
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
last_block = blocknr;
continue;
}
- if (parent->map_token) {
- unmap_extent_buffer(parent, parent->map_token,
- KM_USER1);
- parent->map_token = NULL;
- }
cur = btrfs_find_tree_block(root, blocknr, blocksize);
if (cur)
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(cur);
free_extent_buffer(cur);
}
- if (parent->map_token) {
- unmap_extent_buffer(parent, parent->map_token,
- KM_USER1);
- parent->map_token = NULL;
- }
return err;
}
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
struct btrfs_disk_key *tmp = NULL;
struct btrfs_disk_key unaligned;
unsigned long offset;
- char *map_token = NULL;
char *kaddr = NULL;
unsigned long map_start = 0;
unsigned long map_len = 0;
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
mid = (low + high) / 2;
offset = p + mid * item_size;
- if (!map_token || offset < map_start ||
+ if (!kaddr || offset < map_start ||
(offset + sizeof(struct btrfs_disk_key)) >
map_start + map_len) {
- if (map_token) {
- unmap_extent_buffer(eb, map_token, KM_USER0);
- map_token = NULL;
- }
err = map_private_extent_buffer(eb, offset,
sizeof(struct btrfs_disk_key),
- &map_token, &kaddr,
- &map_start, &map_len, KM_USER0);
+ &kaddr, &map_start, &map_len);
if (!err) {
tmp = (struct btrfs_disk_key *)(kaddr + offset -
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
high = mid;
else {
*slot = mid;
- if (map_token)
- unmap_extent_buffer(eb, map_token, KM_USER0);
return 0;
}
}
*slot = low;
- if (map_token)
- unmap_extent_buffer(eb, map_token, KM_USER0);
return 1;
}
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
mid = path->nodes[level];
- WARN_ON(!path->locks[level]);
+ WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
+ path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
WARN_ON(btrfs_header_generation(mid) != trans->transid);
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1249,16 +1256,8 @@ static void reada_for_search(struct btrfs_root *root,
nritems = btrfs_header_nritems(node);
nr = slot;
+
while (1) {
- if (!node->map_token) {
- unsigned long offset = btrfs_node_key_ptr_offset(nr);
- map_private_extent_buffer(node, offset,
- sizeof(struct btrfs_key_ptr),
- &node->map_token,
- &node->kaddr,
- &node->map_start,
- &node->map_len, KM_USER1);
- }
if (direction < 0) {
if (nr == 0)
break;
@@ -1277,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root,
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
gen = btrfs_node_ptr_generation(node, nr);
- if (node->map_token) {
- unmap_extent_buffer(node, node->map_token,
- KM_USER1);
- node->map_token = NULL;
- }
readahead_tree_block(root, search, blocksize, gen);
nread += blocksize;
}
@@ -1289,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root,
if ((nread > 65536 || nscan > 32))
break;
}
- if (node->map_token) {
- unmap_extent_buffer(node, node->map_token, KM_USER1);
- node->map_token = NULL;
- }
}
/*
@@ -1405,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
t = path->nodes[i];
if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
- btrfs_tree_unlock(t);
+ btrfs_tree_unlock_rw(t, path->locks[i]);
path->locks[i] = 0;
}
}
@@ -1432,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
continue;
if (!path->locks[i])
continue;
- btrfs_tree_unlock(path->nodes[i]);
+ btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
path->locks[i] = 0;
}
}
@@ -1481,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* we can trust our generation number
*/
free_extent_buffer(tmp);
+ btrfs_set_path_blocking(p);
+
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
*eb_ret = tmp;
@@ -1536,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,
static int
setup_nodes_for_search(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer *b, int level, int ins_len)
+ struct extent_buffer *b, int level, int ins_len,
+ int *write_lock_level)
{
int ret;
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
int sret;
+ if (*write_lock_level < level + 1) {
+ *write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
BUG_ON(sret > 0);
if (sret) {
@@ -1561,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
int sret;
+ if (*write_lock_level < level + 1) {
+ *write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
if (sret) {
ret = sret;
@@ -1611,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
int err;
int level;
int lowest_unlock = 1;
+ int root_lock;
+ /* everything at write_lock_level or lower must be write locked */
+ int write_lock_level = 0;
u8 lowest_level = 0;
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
WARN_ON(p->nodes[0] != NULL);
- if (ins_len < 0)
+ if (ins_len < 0) {
lowest_unlock = 2;
+ /* when we are removing items, we might have to go up to level
+ * two as we update tree pointers Make sure we keep write
+ * for those levels as well
+ */
+ write_lock_level = 2;
+ } else if (ins_len > 0) {
+ /*
+ * for inserting items, make sure we have a write lock on
+ * level 1 so we can update keys
+ */
+ write_lock_level = 1;
+ }
+
+ if (!cow)
+ write_lock_level = -1;
+
+ if (cow && (p->keep_locks || p->lowest_level))
+ write_lock_level = BTRFS_MAX_LEVEL;
+
again:
+ /*
+ * we try very hard to do read locks on the root
+ */
+ root_lock = BTRFS_READ_LOCK;
+ level = 0;
if (p->search_commit_root) {
+ /*
+ * the commit roots are read only
+ * so we always do read locks
+ */
b = root->commit_root;
extent_buffer_get(b);
+ level = btrfs_header_level(b);
if (!p->skip_locking)
- btrfs_tree_lock(b);
+ btrfs_tree_read_lock(b);
} else {
- if (p->skip_locking)
+ if (p->skip_locking) {
b = btrfs_root_node(root);
- else
- b = btrfs_lock_root_node(root);
+ level = btrfs_header_level(b);
+ } else {
+ /* we don't know the level of the root node
+ * until we actually have it read locked
+ */
+ b = btrfs_read_lock_root_node(root);
+ level = btrfs_header_level(b);
+ if (level <= write_lock_level) {
+ /* whoops, must trade for write lock */
+ btrfs_tree_read_unlock(b);
+ free_extent_buffer(b);
+ b = btrfs_lock_root_node(root);
+ root_lock = BTRFS_WRITE_LOCK;
+
+ /* the level might have changed, check again */
+ level = btrfs_header_level(b);
+ }
+ }
}
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = root_lock;
while (b) {
level = btrfs_header_level(b);
@@ -1640,10 +1696,6 @@ again:
* setup the path here so we can release it under lock
* contention with the cow code
*/
- p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = 1;
-
if (cow) {
/*
* if we don't really need to cow this block
@@ -1655,6 +1707,16 @@ again:
btrfs_set_path_blocking(p);
+ /*
+ * must have write locks on this node and the
+ * parent
+ */
+ if (level + 1 > write_lock_level) {
+ write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
err = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1], &b);
@@ -1667,10 +1729,7 @@ cow_done:
BUG_ON(!cow && ins_len);
p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = 1;
-
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
/*
* we have a lock on b and as long as we aren't changing
@@ -1696,7 +1755,7 @@ cow_done:
}
p->slots[level] = slot;
err = setup_nodes_for_search(trans, root, p, b, level,
- ins_len);
+ ins_len, &write_lock_level);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -1706,6 +1765,19 @@ cow_done:
b = p->nodes[level];
slot = p->slots[level];
+ /*
+ * slot 0 is special, if we change the key
+ * we have to update the parent pointer
+ * which means we must have a write lock
+ * on the parent
+ */
+ if (slot == 0 && cow &&
+ write_lock_level < level + 1) {
+ write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
unlock_up(p, level, lowest_unlock);
if (level == lowest_level) {
@@ -1724,23 +1796,42 @@ cow_done:
}
if (!p->skip_locking) {
- btrfs_clear_path_blocking(p, NULL);
- err = btrfs_try_spin_lock(b);
-
- if (!err) {
- btrfs_set_path_blocking(p);
- btrfs_tree_lock(b);
- btrfs_clear_path_blocking(p, b);
+ level = btrfs_header_level(b);
+ if (level <= write_lock_level) {
+ err = btrfs_try_tree_write_lock(b);
+ if (!err) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_lock(b);
+ btrfs_clear_path_blocking(p, b,
+ BTRFS_WRITE_LOCK);
+ }
+ p->locks[level] = BTRFS_WRITE_LOCK;
+ } else {
+ err = btrfs_try_tree_read_lock(b);
+ if (!err) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_read_lock(b);
+ btrfs_clear_path_blocking(p, b,
+ BTRFS_READ_LOCK);
+ }
+ p->locks[level] = BTRFS_READ_LOCK;
}
+ p->nodes[level] = b;
}
} else {
p->slots[level] = slot;
if (ins_len > 0 &&
btrfs_leaf_free_space(root, b) < ins_len) {
+ if (write_lock_level < 1) {
+ write_lock_level = 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
btrfs_set_path_blocking(p);
err = split_leaf(trans, root, key,
p, ins_len, ret == 0);
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
BUG_ON(err > 0);
if (err) {
@@ -2021,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
extent_buffer_get(c);
path->nodes[level] = c;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK;
path->slots[level] = 0;
return 0;
}
@@ -2249,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (path->slots[0] == i)
push_space += data_size;
- if (!left->map_token) {
- map_extent_buffer(left, (unsigned long)item,
- sizeof(struct btrfs_item),
- &left->map_token, &left->kaddr,
- &left->map_start, &left->map_len,
- KM_USER1);
- }
-
this_item_size = btrfs_item_size(left, item);
if (this_item_size + sizeof(*item) + push_space > free_space)
break;
@@ -2267,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
break;
i--;
}
- if (left->map_token) {
- unmap_extent_buffer(left, left->map_token, KM_USER1);
- left->map_token = NULL;
- }
if (push_items == 0)
goto out_unlock;
@@ -2312,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
push_space -= btrfs_item_size(right, item);
btrfs_set_item_offset(right, item, push_space);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
left_nritems -= push_items;
btrfs_set_header_nritems(left, left_nritems);
@@ -2463,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -2492,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
push_space += this_item_size + sizeof(*item);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
-
if (push_items == 0) {
ret = 1;
goto out;
@@ -2526,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(left, i);
- if (!left->map_token) {
- map_extent_buffer(left, (unsigned long)item,
- sizeof(struct btrfs_item),
- &left->map_token, &left->kaddr,
- &left->map_start, &left->map_len,
- KM_USER1);
- }
ioff = btrfs_item_offset(left, item);
btrfs_set_item_offset(left, item,
ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
- if (left->map_token) {
- unmap_extent_buffer(left, left->map_token, KM_USER1);
- left->map_token = NULL;
- }
/* fixup right node */
if (push_items > right_nritems) {
@@ -2570,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
-
push_space = push_space - btrfs_item_size(right, item);
btrfs_set_item_offset(right, item, push_space);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
btrfs_mark_buffer_dirty(left);
if (right_nritems)
@@ -2725,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
struct btrfs_item *item = btrfs_item_nr(right, i);
u32 ioff;
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(right, item);
btrfs_set_item_offset(right, item, ioff + rt_data_off);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
-
btrfs_set_header_nritems(l, mid);
ret = 0;
btrfs_item_key(right, &disk_key, 0);
@@ -3260,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff + size_diff);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the data */
if (from_end) {
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
@@ -3373,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff - data_size);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the data */
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
data_end - data_size, btrfs_leaf_data(leaf) +
@@ -3490,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- WARN_ON(leaf->map_token);
for (i = slot; i < nritems; i++) {
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff - total_data);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
btrfs_item_nr_offset(slot),
@@ -3604,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- WARN_ON(leaf->map_token);
for (i = slot; i < nritems; i++) {
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff - total_data);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
btrfs_item_nr_offset(slot),
@@ -3836,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff + dsize);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
btrfs_item_nr_offset(slot + nr),
sizeof(struct btrfs_item) *
@@ -4000,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
WARN_ON(!path->keep_locks);
again:
- cur = btrfs_lock_root_node(root);
+ cur = btrfs_read_lock_root_node(root);
level = btrfs_header_level(cur);
WARN_ON(path->nodes[level]);
path->nodes[level] = cur;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_READ_LOCK;
if (btrfs_header_generation(cur) < min_trans) {
ret = 1;
@@ -4094,12 +4049,12 @@ find_next_key:
cur = read_node_slot(root, cur, slot);
BUG_ON(!cur);
- btrfs_tree_lock(cur);
+ btrfs_tree_read_lock(cur);
- path->locks[level - 1] = 1;
+ path->locks[level - 1] = BTRFS_READ_LOCK;
path->nodes[level - 1] = cur;
unlock_up(path, level, 1);
- btrfs_clear_path_blocking(path, NULL);
+ btrfs_clear_path_blocking(path, NULL, 0);
}
out:
if (ret == 0)
@@ -4214,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
u32 nritems;
int ret;
int old_spinning = path->leave_spinning;
- int force_blocking = 0;
+ int next_rw_lock = 0;
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
return 1;
- /*
- * we take the blocks in an order that upsets lockdep. Using
- * blocking mode is the only way around it.
- */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- force_blocking = 1;
-#endif
-
btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
again:
level = 1;
next = NULL;
+ next_rw_lock = 0;
btrfs_release_path(path);
path->keep_locks = 1;
-
- if (!force_blocking)
- path->leave_spinning = 1;
+ path->leave_spinning = 1;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
path->keep_locks = 0;
@@ -4277,11 +4223,12 @@ again:
}
if (next) {
- btrfs_tree_unlock(next);
+ btrfs_tree_unlock_rw(next, next_rw_lock);
free_extent_buffer(next);
}
next = c;
+ next_rw_lock = path->locks[level];
ret = read_block_for_search(NULL, root, path, &next, level,
slot, &key);
if (ret == -EAGAIN)
@@ -4293,15 +4240,14 @@ again:
}
if (!path->skip_locking) {
- ret = btrfs_try_spin_lock(next);
+ ret = btrfs_try_tree_read_lock(next);
if (!ret) {
btrfs_set_path_blocking(path);
- btrfs_tree_lock(next);
- if (!force_blocking)
- btrfs_clear_path_blocking(path, next);
+ btrfs_tree_read_lock(next);
+ btrfs_clear_path_blocking(path, next,
+ BTRFS_READ_LOCK);
}
- if (force_blocking)
- btrfs_set_lock_blocking(next);
+ next_rw_lock = BTRFS_READ_LOCK;
}
break;
}
@@ -4310,14 +4256,13 @@ again:
level--;
c = path->nodes[level];
if (path->locks[level])
- btrfs_tree_unlock(c);
+ btrfs_tree_unlock_rw(c, path->locks[level]);
free_extent_buffer(c);
path->nodes[level] = next;
path->slots[level] = 0;
if (!path->skip_locking)
- path->locks[level] = 1;
-
+ path->locks[level] = next_rw_lock;
if (!level)
break;
@@ -4332,16 +4277,14 @@ again:
}
if (!path->skip_locking) {
- btrfs_assert_tree_locked(path->nodes[level]);
- ret = btrfs_try_spin_lock(next);
+ ret = btrfs_try_tree_read_lock(next);
if (!ret) {
btrfs_set_path_blocking(path);
- btrfs_tree_lock(next);
- if (!force_blocking)
- btrfs_clear_path_blocking(path, next);
+ btrfs_tree_read_lock(next);
+ btrfs_clear_path_blocking(path, next,
+ BTRFS_READ_LOCK);
}
- if (force_blocking)
- btrfs_set_lock_blocking(next);
+ next_rw_lock = BTRFS_READ_LOCK;
}
}
ret = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..0469263e327e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,7 +19,6 @@
#ifndef __BTRFS_CTREE__
#define __BTRFS_CTREE__
-#include <linux/version.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
@@ -756,6 +755,8 @@ struct btrfs_space_info {
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
+ unsigned int flush:1; /* set if we are trying to make space */
+
unsigned int force_alloc; /* set if we need to force a chunk
alloc for this space */
@@ -765,7 +766,7 @@ struct btrfs_space_info {
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
- atomic_t caching_threads;
+ wait_queue_head_t wait;
};
struct btrfs_block_rsv {
@@ -825,6 +826,7 @@ struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
wait_queue_head_t wait;
+ struct btrfs_work work;
struct btrfs_block_group_cache *block_group;
u64 progress;
atomic_t count;
@@ -967,6 +969,12 @@ struct btrfs_fs_info {
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
+ /*
+ * the reloc mutex goes with the trans lock, it is taken
+ * during commit to protect us from the relocation code
+ */
+ struct mutex reloc_mutex;
+
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
@@ -1027,6 +1035,8 @@ struct btrfs_fs_info {
struct btrfs_workers endio_write_workers;
struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
+ struct btrfs_workers caching_workers;
+
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
@@ -1172,6 +1182,14 @@ struct btrfs_root {
u32 type;
u64 highest_objectid;
+
+ /* btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code. But the
+ * race is very small, and only the first time the root
+ * is added to each transaction. So in_trans_setup
+ * is used to tell us when more checks are required
+ */
+ unsigned long in_trans_setup;
int ref_cows;
int track_dirty;
int in_radix;
@@ -1181,7 +1199,6 @@ struct btrfs_root {
struct btrfs_key defrag_max;
int defrag_running;
char *name;
- int in_sysfs;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
@@ -1207,7 +1224,7 @@ struct btrfs_root {
* right now this just gets used so that a root has its own devid
* for stat. It may be used for more later
*/
- struct super_block anon_super;
+ dev_t anon_dev;
};
struct btrfs_ioctl_defrag_range_args {
@@ -1323,6 +1340,11 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_STRING_ITEM_KEY 253
+/*
+ * Flags for mount options.
+ *
+ * Note: don't forget to add new options to btrfs_show_options()
+ */
#define BTRFS_MOUNT_NODATASUM (1 << 0)
#define BTRFS_MOUNT_NODATACOW (1 << 1)
#define BTRFS_MOUNT_NOBARRIER (1 << 2)
@@ -2111,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
/* extent-tree.c */
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
- int num_items)
+ unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3 * num_items;
@@ -2205,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2313,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_clear_path_blocking(struct btrfs_path *p,
- struct extent_buffer *held);
+ struct extent_buffer *held, int held_rw);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2387,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
-int btrfs_set_root_node(struct btrfs_root_item *item,
- struct extent_buffer *node);
+void btrfs_set_root_node(struct btrfs_root_item *item,
+ struct extent_buffer *node);
void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
/* dir-item.c */
@@ -2493,6 +2512,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
/* inode.c */
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create);
/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@@ -2501,6 +2523,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
#define PageChecked PageFsMisc
#endif
+/* This forces readahead on a given range of bytes in an inode */
+static inline void btrfs_force_ra(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *file,
+ pgoff_t offset, unsigned long req_size)
+{
+ page_cache_sync_readahead(mapping, ra, file, offset, req_size);
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -2529,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio, unsigned long bio_flags);
-unsigned long btrfs_force_ra(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- pgoff_t offset, pgoff_t last_index);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
@@ -2585,7 +2612,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct inode *inode);
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
-int btrfs_sync_file(struct file *file, int datasync);
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
@@ -2625,13 +2652,22 @@ do { \
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
-#else
-#define btrfs_check_acl NULL
-#endif
+struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode);
+#else
+#define btrfs_get_acl NULL
+static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+static inline int btrfs_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+#endif
/* relocation.c */
int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..b52c672f4c18 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
return root->fs_info->delayed_root;
}
-static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
{
- struct btrfs_delayed_node *node;
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(inode);
- int ret;
+ struct btrfs_delayed_node *node;
-again:
node = ACCESS_ONCE(btrfs_inode->delayed_node);
if (node) {
- atomic_inc(&node->refs); /* can be accessed */
+ atomic_inc(&node->refs);
return node;
}
@@ -102,8 +99,10 @@ again:
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
if (node) {
if (btrfs_inode->delayed_node) {
+ atomic_inc(&node->refs); /* can be accessed */
+ BUG_ON(btrfs_inode->delayed_node != node);
spin_unlock(&root->inode_lock);
- goto again;
+ return node;
}
btrfs_inode->delayed_node = node;
atomic_inc(&node->refs); /* can be accessed */
@@ -113,6 +112,23 @@ again:
}
spin_unlock(&root->inode_lock);
+ return NULL;
+}
+
+static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
+ struct inode *inode)
+{
+ struct btrfs_delayed_node *node;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ struct btrfs_root *root = btrfs_inode->root;
+ u64 ino = btrfs_ino(inode);
+ int ret;
+
+again:
+ node = btrfs_get_delayed_node(inode);
+ if (node)
+ return node;
+
node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
if (!node)
return ERR_PTR(-ENOMEM);
@@ -297,7 +313,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
item->data_len = data_len;
item->ins_or_del = 0;
item->bytes_reserved = 0;
- item->block_rsv = NULL;
item->delayed_node = NULL;
atomic_set(&item->refs, 1);
}
@@ -549,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
return next;
}
-static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
- struct inode *inode)
-{
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
- struct btrfs_delayed_node *delayed_node;
-
- delayed_node = btrfs_inode->delayed_node;
- if (delayed_node)
- atomic_inc(&delayed_node->refs);
-
- return delayed_node;
-}
-
static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
u64 root_id)
{
@@ -593,10 +595,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret) {
+ if (!ret)
item->bytes_reserved = num_bytes;
- item->block_rsv = dst_rsv;
- }
return ret;
}
@@ -604,10 +604,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
+ struct btrfs_block_rsv *rsv;
+
if (!item->bytes_reserved)
return;
- btrfs_block_rsv_release(root, item->block_rsv,
+ rsv = &root->fs_info->global_block_rsv;
+ btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@@ -732,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
}
/* reset all the locked nodes in the patch to spinning locks. */
- btrfs_clear_path_blocking(path, NULL);
+ btrfs_clear_path_blocking(path, NULL, 0);
/* insert the keys of the items */
ret = setup_items_for_insert(trans, root, path, keys, data_size,
@@ -1014,6 +1017,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret = 0;
path = btrfs_alloc_path();
@@ -1021,6 +1025,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1052,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
}
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1052,6 +1060,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret;
path = btrfs_alloc_path();
@@ -1059,6 +1068,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1078,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1116,6 +1129,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
+ struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
@@ -1134,6 +1148,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
if (IS_ERR(trans))
goto free_path;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1193,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
nr = trans->blocks_used;
+ trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
free_path:
@@ -1222,6 +1240,13 @@ again:
return 0;
}
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+ struct btrfs_delayed_root *delayed_root;
+ delayed_root = btrfs_get_delayed_root(root);
+ WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
@@ -1382,8 +1407,7 @@ end:
int btrfs_inode_delayed_dir_index_count(struct inode *inode)
{
- struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
- int ret = 0;
+ struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
return -ENOENT;
@@ -1393,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
* a new directory index is added into the delayed node and index_cnt
* is updated now. So we needn't lock the delayed node.
*/
- if (!delayed_node->index_cnt)
+ if (!delayed_node->index_cnt) {
+ btrfs_release_delayed_node(delayed_node);
return -EINVAL;
+ }
BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
- return ret;
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
}
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
@@ -1591,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode->i_ctime.tv_nsec);
}
+int btrfs_fill_inode(struct inode *inode, u32 *rdev)
+{
+ struct btrfs_delayed_node *delayed_node;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_timespec *tspec;
+
+ delayed_node = btrfs_get_delayed_node(inode);
+ if (!delayed_node)
+ return -ENOENT;
+
+ mutex_lock(&delayed_node->mutex);
+ if (!delayed_node->inode_dirty) {
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return -ENOENT;
+ }
+
+ inode_item = &delayed_node->inode_item;
+
+ inode->i_uid = btrfs_stack_inode_uid(inode_item);
+ inode->i_gid = btrfs_stack_inode_gid(inode_item);
+ btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+ inode->i_mode = btrfs_stack_inode_mode(inode_item);
+ inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
+ inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
+ BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+ BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
+ inode->i_rdev = 0;
+ *rdev = btrfs_stack_inode_rdev(inode_item);
+ BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+
+ tspec = btrfs_inode_atime(inode_item);
+ inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ tspec = btrfs_inode_mtime(inode_item);
+ inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ tspec = btrfs_inode_ctime(inode_item);
+ inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ inode->i_generation = BTRFS_I(inode)->generation;
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
+}
+
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode)
{
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..7083d08b2a21 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -25,7 +25,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "ctree.h"
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
- struct btrfs_block_rsv *block_rsv;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
int ins_or_del;
@@ -120,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
+int btrfs_fill_inode(struct inode *inode, u32 *rdev);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
@@ -138,4 +138,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 685f2593c4f0..31d84e78129b 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len);
- /*
- * FIXME: at some point we should handle xattr's that are larger than
- * what we can fit in our leaf. We set location to NULL b/c we arent
- * pointing at anything else, that will change if we store the xattr
- * data in a separate inode.
- */
- BUG_ON(IS_ERR(dir_item));
+ if (IS_ERR(dir_item))
+ return PTR_ERR(dir_item);
memset(&location, 0, sizeof(location));
leaf = path->nodes[0];
@@ -203,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
@@ -214,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
- if (ret > 0) {
- if (path->slots[0] == 0)
- return NULL;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != dir ||
- btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
- found_key.offset != key.offset)
+ if (ret > 0)
return NULL;
return btrfs_match_dir_item_name(root, path, name, name_len);
@@ -320,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_key key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
@@ -329,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
return ERR_PTR(ret);
- if (ret > 0) {
- if (path->slots[0] == 0)
- return NULL;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != dir ||
- btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
- found_key.offset != key.offset)
+ if (ret > 0)
return NULL;
return btrfs_match_dir_item_name(root, path, name, name_len);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d363184d..07b3ac662e19 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -100,38 +100,83 @@ struct async_submit_bio {
struct btrfs_work work;
};
-/* These are used to set the lockdep class on the extent buffer locks.
- * The class is set by the readpage_end_io_hook after the buffer has
- * passed csum validation but before the pages are unlocked.
+/*
+ * Lockdep class keys for extent_buffer->lock's in this root. For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
+ *
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets. As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->objectid. This ensures that all special purpose roots
+ * have separate keysets.
*
- * The lockdep class is also set by btrfs_init_new_buffer on freshly
- * allocated blocks.
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock. As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
*
- * The class is based on the level in the tree block, which allows lockdep
- * to know that lower nodes nest inside the locks of higher nodes.
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked. It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
*
- * We also add a check to make sure the highest level of the tree is
- * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
- * code needs update as well.
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
-static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
-static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
- /* leaf */
- "btrfs-extent-00",
- "btrfs-extent-01",
- "btrfs-extent-02",
- "btrfs-extent-03",
- "btrfs-extent-04",
- "btrfs-extent-05",
- "btrfs-extent-06",
- "btrfs-extent-07",
- /* highest possible level */
- "btrfs-extent-08",
+
+static struct btrfs_lockdep_keyset {
+ u64 id; /* root objectid */
+ const char *name_stem; /* lock name stem */
+ char names[BTRFS_MAX_LEVEL + 1][20];
+ struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
+} btrfs_lockdep_keysets[] = {
+ { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
+ { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
+ { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
+ { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
+ { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
+ { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
+ { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
+ { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
+ { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
+ { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
+ { .id = 0, .name_stem = "tree" },
};
+
+void __init btrfs_init_lockdep(void)
+{
+ int i, j;
+
+ /* initialize lockdep class names */
+ for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+ struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+
+ for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+ snprintf(ks->names[j], sizeof(ks->names[j]),
+ "btrfs-%s-%02d", ks->name_stem, j);
+ }
+}
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+ int level)
+{
+ struct btrfs_lockdep_keyset *ks;
+
+ BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+ /* find the matching keyset, id 0 is the default entry */
+ for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+ if (ks->id == objectid)
+ break;
+
+ lockdep_set_class_and_name(&eb->lock,
+ &ks->keys[level], ks->names[level]);
+}
+
#endif
/*
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
- char *map_token = NULL;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
len = buf->len - offset;
while (len > 0) {
err = map_private_extent_buffer(buf, offset, 32,
- &map_token, &kaddr,
- &map_start, &map_len, KM_USER0);
+ &kaddr, &map_start, &map_len);
if (err)
return 1;
cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
crc, cur_len);
len -= cur_len;
offset += cur_len;
- unmap_extent_buffer(buf, map_token, KM_USER0);
}
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
-{
- lockdep_set_class_and_name(&eb->lock,
- &btrfs_eb_class[level],
- btrfs_eb_name[level]);
-}
-#endif
-
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
found_level = btrfs_header_level(eb);
- btrfs_set_buffer_lockdep_class(eb, found_level);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+ eb, found_level);
ret = csum_tree_block(root, eb, 1);
if (ret) {
@@ -1044,7 +1078,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_trans = 0;
root->highest_objectid = 0;
root->name = NULL;
- root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1078,12 +1111,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
- root->anon_super.s_root = NULL;
- root->anon_super.s_dev = 0;
- INIT_LIST_HEAD(&root->anon_super.s_list);
- INIT_LIST_HEAD(&root->anon_super.s_instances);
- init_rwsem(&root->anon_super.s_umount);
-
+ root->anon_dev = 0;
return 0;
}
@@ -1300,19 +1328,21 @@ again:
return root;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- if (!root->free_ino_ctl)
- goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
- if (!root->free_ino_pinned)
+ if (!root->free_ino_pinned || !root->free_ino_ctl) {
+ ret = -ENOMEM;
goto fail;
+ }
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
- set_anon_super(&root->anon_super, NULL);
+ ret = get_anon_bdev(&root->anon_dev);
+ if (ret)
+ goto fail;
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
@@ -1602,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
goto fail_bdi;
}
- fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+ mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1618,6 +1648,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
+ mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
@@ -1668,8 +1699,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->scrub_pause_wait);
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
- btrfs_init_workers(&fs_info->scrub_workers, "scrub",
- fs_info->thread_pool_size, &fs_info->generic_worker);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@@ -1807,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->thread_pool_size),
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->caching_workers, "cache",
+ 2, &fs_info->generic_worker);
+
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
* devices
@@ -1860,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
btrfs_start_workers(&fs_info->delayed_workers, 1);
+ btrfs_start_workers(&fs_info->caching_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2117,6 +2150,7 @@ fail_sb_buffer:
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
fail_alloc:
kfree(fs_info->delayed_root);
fail_iput:
@@ -2393,10 +2427,8 @@ static void free_fs_root(struct btrfs_root *root)
{
iput(root->cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
- if (root->anon_super.s_dev) {
- down_write(&root->anon_super.s_umount);
- kill_anon_super(&root->anon_super);
- }
+ if (root->anon_dev)
+ free_anon_bdev(root->anon_dev);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
@@ -2584,6 +2616,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2911,9 +2944,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
INIT_LIST_HEAD(&splice);
- list_splice_init(&root->fs_info->delalloc_inodes, &splice);
-
spin_lock(&root->fs_info->delalloc_lock);
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a0b610a67aae..bec3ea4bd67f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+void btrfs_init_lockdep(void);
+void btrfs_set_buffer_lockdep_class(u64 objectid,
+ struct extent_buffer *eb, int level);
#else
-static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
- int level)
+static inline void btrfs_init_lockdep(void)
+{ }
+static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+ struct extent_buffer *eb, int level)
{
}
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5b9b6b6df242..66bac226944e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
return total_added;
}
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
{
- struct btrfs_block_group_cache *block_group = data;
- struct btrfs_fs_info *fs_info = block_group->fs_info;
- struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_root *extent_root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
u32 nritems;
int ret = 0;
+ caching_ctl = container_of(work, struct btrfs_caching_control, work);
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ extent_root = fs_info->extent_root;
+
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ goto out;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -433,13 +438,11 @@ err:
free_excluded_extents(extent_root, block_group);
mutex_unlock(&caching_ctl->mutex);
+out:
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
- atomic_dec(&block_group->space_info->caching_threads);
btrfs_put_block_group(block_group);
-
- return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
- struct task_struct *tsk;
int ret = 0;
smp_mb();
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
caching_ctl->progress = cache->key.objectid;
/* one for caching kthread, one for caching block group list */
atomic_set(&caching_ctl->count, 2);
+ caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);
- atomic_inc(&cache->space_info->caching_threads);
btrfs_get_block_group(cache);
- tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
- cache->key.objectid);
- if (IS_ERR(tsk)) {
- ret = PTR_ERR(tsk);
- printk(KERN_ERR "error running thread %d\n", ret);
- BUG();
- }
+ btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
return ret;
}
@@ -667,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
struct btrfs_path *path;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
key.objectid = start;
key.offset = len;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -2932,9 +2930,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->full = 0;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
+ found->flush = 0;
+ init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
- atomic_set(&found->caching_threads, 0);
return 0;
}
@@ -3089,6 +3088,13 @@ alloc:
}
goto again;
}
+
+ /*
+ * If we have less pinned bytes than we want to allocate then
+ * don't bother committing the transaction, it won't help us.
+ */
+ if (data_sinfo->bytes_pinned < bytes)
+ committed = 1;
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
@@ -3268,6 +3274,9 @@ again:
}
ret = btrfs_alloc_chunk(trans, extent_root, flags);
+ if (ret < 0 && ret != -ENOSPC)
+ goto out;
+
spin_lock(&space_info->lock);
if (ret)
space_info->full = 1;
@@ -3277,6 +3286,7 @@ again:
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
+out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
}
@@ -3307,9 +3317,13 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0)
return 0;
- /* nothing to shrink - nothing to reclaim */
- if (root->fs_info->delalloc_bytes == 0)
+ smp_mb();
+ if (root->fs_info->delalloc_bytes == 0) {
+ if (trans)
+ return 0;
+ btrfs_wait_ordered_extents(root, 0, 0);
return 0;
+ }
max_reclaim = min(reserved, to_reclaim);
@@ -3353,6 +3367,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
}
+ if (reclaimed >= to_reclaim && !trans)
+ btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >= to_reclaim;
}
@@ -3377,15 +3393,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
- bool reserved = false;
bool committed = false;
+ bool flushing = false;
again:
- ret = -ENOSPC;
- if (reserved)
- num_bytes = 0;
-
+ ret = 0;
spin_lock(&space_info->lock);
+ /*
+ * We only want to wait if somebody other than us is flushing and we are
+ * actually alloed to flush.
+ */
+ while (flush && !flushing && space_info->flush) {
+ spin_unlock(&space_info->lock);
+ /*
+ * If we have a trans handle we can't wait because the flusher
+ * may have to commit the transaction, which would mean we would
+ * deadlock since we are waiting for the flusher to finish, but
+ * hold the current transaction open.
+ */
+ if (trans)
+ return -EAGAIN;
+ ret = wait_event_interruptible(space_info->wait,
+ !space_info->flush);
+ /* Must have been interrupted, return */
+ if (ret)
+ return -EINTR;
+
+ spin_lock(&space_info->lock);
+ }
+
+ ret = -ENOSPC;
unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
@@ -3400,8 +3437,7 @@ again:
if (unused <= space_info->total_bytes) {
unused = space_info->total_bytes - unused;
if (unused >= num_bytes) {
- if (!reserved)
- space_info->bytes_reserved += orig_bytes;
+ space_info->bytes_reserved += orig_bytes;
ret = 0;
} else {
/*
@@ -3426,17 +3462,14 @@ again:
* to reclaim space we can actually use it instead of somebody else
* stealing it from us.
*/
- if (ret && !reserved) {
- space_info->bytes_reserved += orig_bytes;
- reserved = true;
+ if (ret && flush) {
+ flushing = true;
+ space_info->flush = 1;
}
spin_unlock(&space_info->lock);
- if (!ret)
- return 0;
-
- if (!flush)
+ if (!ret || !flush)
goto out;
/*
@@ -3444,11 +3477,11 @@ again:
* metadata until after the IO is completed.
*/
ret = shrink_delalloc(trans, root, num_bytes, 1);
- if (ret > 0)
- return 0;
- else if (ret < 0)
+ if (ret < 0)
goto out;
+ ret = 0;
+
/*
* So if we were overcommitted it's possible that somebody else flushed
* out enough space and we simply didn't have enough space to reclaim,
@@ -3459,11 +3492,11 @@ again:
goto again;
}
- spin_lock(&space_info->lock);
/*
* Not enough space to be reclaimed, don't bother committing the
* transaction.
*/
+ spin_lock(&space_info->lock);
if (space_info->bytes_pinned < orig_bytes)
ret = -ENOSPC;
spin_unlock(&space_info->lock);
@@ -3471,10 +3504,13 @@ again:
goto out;
ret = -EAGAIN;
- if (trans || committed)
+ if (trans)
goto out;
ret = -ENOSPC;
+ if (committed)
+ goto out;
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
goto out;
@@ -3486,12 +3522,12 @@ again:
}
out:
- if (reserved) {
+ if (flushing) {
spin_lock(&space_info->lock);
- space_info->bytes_reserved -= orig_bytes;
+ space_info->flush = 0;
+ wake_up_all(&space_info->wait);
spin_unlock(&space_info->lock);
}
-
return ret;
}
@@ -3701,7 +3737,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
if (commit_trans) {
if (trans)
return -EAGAIN;
-
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
@@ -3871,26 +3906,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
return 0;
}
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int num_items)
-{
- u64 num_bytes;
- int ret;
-
- if (num_items == 0 || root->fs_info->chunk_root == root)
- return 0;
-
- num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
- ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
- num_bytes);
- if (!ret) {
- trans->bytes_reserved += num_bytes;
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- }
- return ret;
-}
-
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -3941,6 +3956,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+ unsigned dropped_extents = 0;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+ BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+ BTRFS_I(inode)->outstanding_extents--;
+
+ /*
+ * If we have more or the same amount of outsanding extents than we have
+ * reserved then we need to leave the reserved extents count alone.
+ */
+ if (BTRFS_I(inode)->outstanding_extents >=
+ BTRFS_I(inode)->reserved_extents)
+ goto out;
+
+ dropped_extents = BTRFS_I(inode)->reserved_extents -
+ BTRFS_I(inode)->outstanding_extents;
+ BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+ spin_unlock(&BTRFS_I(inode)->lock);
+ return dropped_extents;
+}
+
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
{
return num_bytes >>= 3;
@@ -3950,9 +3989,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
- u64 to_reserve;
- int nr_extents;
- int reserved_extents;
+ u64 to_reserve = 0;
+ unsigned nr_extents = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
@@ -3960,66 +3998,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
- reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+
+ if (BTRFS_I(inode)->outstanding_extents >
+ BTRFS_I(inode)->reserved_extents) {
+ nr_extents = BTRFS_I(inode)->outstanding_extents -
+ BTRFS_I(inode)->reserved_extents;
+ BTRFS_I(inode)->reserved_extents += nr_extents;
- if (nr_extents > reserved_extents) {
- nr_extents -= reserved_extents;
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
- } else {
- nr_extents = 0;
- to_reserve = 0;
}
+ spin_unlock(&BTRFS_I(inode)->lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
- if (ret)
+ if (ret) {
+ unsigned dropped;
+ /*
+ * We don't need the return value since our reservation failed,
+ * we just need to clean up our counter.
+ */
+ dropped = drop_outstanding_extent(inode);
+ WARN_ON(dropped > 1);
return ret;
-
- atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ }
block_rsv_add_bytes(block_rsv, to_reserve, 1);
- if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve, 0);
-
return 0;
}
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 to_free;
- int nr_extents;
- int reserved_extents;
+ u64 to_free = 0;
+ unsigned dropped;
num_bytes = ALIGN(num_bytes, root->sectorsize);
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
- WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
- reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
- do {
- int old, new;
-
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
- if (nr_extents >= reserved_extents) {
- nr_extents = 0;
- break;
- }
- old = reserved_extents;
- nr_extents = reserved_extents - nr_extents;
- new = reserved_extents - nr_extents;
- old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
- reserved_extents, new);
- if (likely(old == reserved_extents))
- break;
- reserved_extents = old;
- } while (1);
+ dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes);
- if (nr_extents > 0)
- to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+ if (dropped > 0)
+ to_free += btrfs_calc_trans_metadata_size(root, dropped);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
@@ -4441,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
printk(KERN_ERR "umm, got %d back from search"
", was looking for %llu\n", ret,
(unsigned long long)bytenr);
- btrfs_print_leaf(extent_root, path->nodes[0]);
+ if (ret > 0)
+ btrfs_print_leaf(extent_root,
+ path->nodes[0]);
}
BUG_ON(ret);
extent_slot = path->slots[0];
@@ -4839,7 +4862,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
- int data)
+ u64 data)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4866,7 +4889,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
space_info = __find_space_info(root->fs_info, data);
if (!space_info) {
- printk(KERN_ERR "No space info for %d\n", data);
+ printk(KERN_ERR "No space info for %llu\n", data);
return -ENOSPC;
}
@@ -4987,14 +5010,10 @@ have_block_group:
}
/*
- * We only want to start kthread caching if we are at
- * the point where we will wait for caching to make
- * progress, or if our ideal search is over and we've
- * found somebody to start caching.
+ * The caching workers are limited to 2 threads, so we
+ * can queue as much work as we care to.
*/
- if (loop > LOOP_CACHING_NOWAIT ||
- (loop > LOOP_FIND_IDEAL &&
- atomic_read(&space_info->caching_threads) < 2)) {
+ if (loop > LOOP_FIND_IDEAL) {
ret = cache_block_group(block_group, trans,
orig_root, 0);
BUG_ON(ret);
@@ -5062,7 +5081,9 @@ have_block_group:
* group is does point to and try again
*/
if (!last_ptr_loop && last_ptr->block_group &&
- last_ptr->block_group != block_group) {
+ last_ptr->block_group != block_group &&
+ index <=
+ get_block_group_index(last_ptr->block_group)) {
btrfs_put_block_group(block_group);
block_group = last_ptr->block_group;
@@ -5211,15 +5232,12 @@ loop:
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
*/
- if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
- (found_uncached_bg || empty_size || empty_cluster ||
- allowed_chunk_alloc)) {
+ if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
index = 0;
if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
found_uncached_bg = false;
loop++;
- if (!ideal_cache_percent &&
- atomic_read(&space_info->caching_threads))
+ if (!ideal_cache_percent)
goto search;
/*
@@ -5253,32 +5271,36 @@ loop:
goto search;
}
- if (loop < LOOP_CACHING_WAIT) {
- loop++;
- goto search;
- }
+ loop++;
if (loop == LOOP_ALLOC_CHUNK) {
- empty_size = 0;
- empty_cluster = 0;
- }
+ if (allowed_chunk_alloc) {
+ ret = do_chunk_alloc(trans, root, num_bytes +
+ 2 * 1024 * 1024, data,
+ CHUNK_ALLOC_LIMITED);
+ allowed_chunk_alloc = 0;
+ if (ret == 1)
+ done_chunk_alloc = 1;
+ } else if (!done_chunk_alloc &&
+ space_info->force_alloc ==
+ CHUNK_ALLOC_NO_FORCE) {
+ space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+ }
- if (allowed_chunk_alloc) {
- ret = do_chunk_alloc(trans, root, num_bytes +
- 2 * 1024 * 1024, data,
- CHUNK_ALLOC_LIMITED);
- allowed_chunk_alloc = 0;
- done_chunk_alloc = 1;
- } else if (!done_chunk_alloc &&
- space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
- space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+ /*
+ * We didn't allocate a chunk, go ahead and drop the
+ * empty size and loop again.
+ */
+ if (!done_chunk_alloc)
+ loop = LOOP_NO_EMPTY_SIZE;
}
- if (loop < LOOP_NO_EMPTY_SIZE) {
- loop++;
- goto search;
+ if (loop == LOOP_NO_EMPTY_SIZE) {
+ empty_size = 0;
+ empty_cluster = 0;
}
- ret = -ENOSPC;
+
+ goto search;
} else if (!ins->objectid) {
ret = -ENOSPC;
} else if (ins->objectid) {
@@ -5489,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5618,7 +5641,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
if (!buf)
return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid);
- btrfs_set_buffer_lockdep_class(buf, level);
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
@@ -5905,7 +5928,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
return 1;
if (path->locks[level] && !wc->keep_locks) {
- btrfs_tree_unlock(eb);
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
@@ -5929,7 +5952,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
* keep the tree lock
*/
if (path->locks[level] && level > 0) {
- btrfs_tree_unlock(eb);
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
@@ -6042,7 +6065,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
BUG_ON(level != btrfs_header_level(next));
path->nodes[level] = next;
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->level = level;
if (wc->level == 1)
wc->reada_slot = 0;
@@ -6113,7 +6136,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(level == 0);
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root,
eb->start, eb->len,
@@ -6122,8 +6145,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret);
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
- btrfs_tree_unlock(eb);
- path->locks[level] = 0;
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
return 1;
}
}
@@ -6145,7 +6167,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
clean_tree_block(trans, root, eb);
}
@@ -6224,7 +6246,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
return 0;
if (path->locks[level]) {
- btrfs_tree_unlock(path->nodes[level]);
+ btrfs_tree_unlock_rw(path->nodes[level],
+ path->locks[level]);
path->locks[level] = 0;
}
free_extent_buffer(path->nodes[level]);
@@ -6260,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int level;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
wc = kzalloc(sizeof(*wc), GFP_NOFS);
- BUG_ON(!wc);
+ if (!wc) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
trans = btrfs_start_transaction(tree_root, 0);
BUG_ON(IS_ERR(trans));
@@ -6276,7 +6303,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0,
sizeof(wc->update_progress));
} else {
@@ -6444,7 +6471,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(node);
path->nodes[level] = node;
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->refs[parent_level] = 1;
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6519,30 +6546,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
return flags;
}
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
+ u64 min_allocable_bytes;
int ret = -ENOSPC;
- if (cache->ro)
- return 0;
+
+ /*
+ * We need some metadata space and system metadata space for
+ * allocating chunks in some corner cases until we force to set
+ * it to be readonly.
+ */
+ if ((sinfo->flags &
+ (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+ !force)
+ min_allocable_bytes = 1 * 1024 * 1024;
+ else
+ min_allocable_bytes = 0;
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
+
+ if (cache->ro) {
+ ret = 0;
+ goto out;
+ }
+
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
cache->bytes_super - btrfs_block_group_used(&cache->item);
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
- cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+ cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+ sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
cache->ro = 1;
ret = 0;
}
-
+out:
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return ret;
@@ -6566,7 +6611,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE);
- ret = set_block_group_ro(cache);
+ ret = set_block_group_ro(cache, 0);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6574,7 +6619,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
- ret = set_block_group_ro(cache);
+ ret = set_block_group_ro(cache, 0);
out:
btrfs_end_transaction(trans, root);
return ret;
@@ -7011,7 +7056,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
}
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7025,9 +7070,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* mirrored block groups.
*/
list_for_each_entry(cache, &space_info->block_groups[3], list)
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
list_for_each_entry(cache, &space_info->block_groups[4], list)
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
}
init_global_block_rsv(info);
@@ -7157,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cluster->refill_lock);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
inode = lookup_free_space_inode(root, block_group, path);
if (!IS_ERR(inode)) {
- btrfs_orphan_add(trans, inode);
+ ret = btrfs_orphan_add(trans, inode);
+ BUG_ON(ret);
clear_nlink(inode);
/* One for the block groups ref */
spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7055d11c1efd..d418164a35f1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
*
* This should be called with the tree lock held.
*/
-static int merge_state(struct extent_io_tree *tree,
- struct extent_state *state)
+static void merge_state(struct extent_io_tree *tree,
+ struct extent_state *state)
{
struct extent_state *other;
struct rb_node *other_node;
if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
- return 0;
+ return;
other_node = rb_prev(&state->rb_node);
if (other_node) {
@@ -281,26 +281,19 @@ static int merge_state(struct extent_io_tree *tree,
if (other->start == state->end + 1 &&
other->state == state->state) {
merge_cb(tree, state, other);
- other->start = state->start;
- state->tree = NULL;
- rb_erase(&state->rb_node, &tree->state);
- free_extent_state(state);
- state = NULL;
+ state->end = other->end;
+ other->tree = NULL;
+ rb_erase(&other->rb_node, &tree->state);
+ free_extent_state(other);
}
}
-
- return 0;
}
-static int set_state_cb(struct extent_io_tree *tree,
+static void set_state_cb(struct extent_io_tree *tree,
struct extent_state *state, int *bits)
{
- if (tree->ops && tree->ops->set_bit_hook) {
- return tree->ops->set_bit_hook(tree->mapping->host,
- state, bits);
- }
-
- return 0;
+ if (tree->ops && tree->ops->set_bit_hook)
+ tree->ops->set_bit_hook(tree->mapping->host, state, bits);
}
static void clear_state_cb(struct extent_io_tree *tree,
@@ -310,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree,
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
}
+static void set_state_bits(struct extent_io_tree *tree,
+ struct extent_state *state, int *bits);
+
/*
* insert an extent_state struct into the tree. 'bits' are set on the
* struct before it is inserted.
@@ -325,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree,
int *bits)
{
struct rb_node *node;
- int bits_to_set = *bits & ~EXTENT_CTLBITS;
- int ret;
if (end < start) {
printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -336,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree,
}
state->start = start;
state->end = end;
- ret = set_state_cb(tree, state, bits);
- if (ret)
- return ret;
- if (bits_to_set & EXTENT_DIRTY)
- tree->dirty_bytes += end - start + 1;
- state->state |= bits_to_set;
+ set_state_bits(tree, state, bits);
+
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@@ -351,7 +341,6 @@ static int insert_state(struct extent_io_tree *tree,
"%llu %llu\n", (unsigned long long)found->start,
(unsigned long long)found->end,
(unsigned long long)start, (unsigned long long)end);
- free_extent_state(state);
return -EEXIST;
}
state->tree = tree;
@@ -359,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree,
return 0;
}
-static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
+static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
u64 split)
{
if (tree->ops && tree->ops->split_extent_hook)
- return tree->ops->split_extent_hook(tree->mapping->host,
- orig, split);
- return 0;
+ tree->ops->split_extent_hook(tree->mapping->host, orig, split);
}
/*
@@ -500,7 +487,8 @@ again:
cached_state = NULL;
}
- if (cached && cached->tree && cached->start == start) {
+ if (cached && cached->tree && cached->start <= start &&
+ cached->end > start) {
if (clear)
atomic_dec(&cached->refs);
state = cached;
@@ -660,34 +648,25 @@ again:
if (start > end)
break;
- if (need_resched()) {
- spin_unlock(&tree->lock);
- cond_resched();
- spin_lock(&tree->lock);
- }
+ cond_resched_lock(&tree->lock);
}
out:
spin_unlock(&tree->lock);
return 0;
}
-static int set_state_bits(struct extent_io_tree *tree,
+static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int *bits)
{
- int ret;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
- ret = set_state_cb(tree, state, bits);
- if (ret)
- return ret;
+ set_state_cb(tree, state, bits);
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
state->state |= bits_to_set;
-
- return 0;
}
static void cache_state(struct extent_state *state,
@@ -742,7 +721,8 @@ again:
spin_lock(&tree->lock);
if (cached_state && *cached_state) {
state = *cached_state;
- if (state->start == start && state->tree) {
+ if (state->start <= start && state->end > start &&
+ state->tree) {
node = &state->rb_node;
goto hit_next;
}
@@ -779,17 +759,15 @@ hit_next:
goto out;
}
- err = set_state_bits(tree, state, &bits);
- if (err)
- goto out;
+ set_state_bits(tree, state, &bits);
- next_node = rb_next(node);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
@@ -830,9 +808,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
- err = set_state_bits(tree, state, &bits);
- if (err)
- goto out;
+ set_state_bits(tree, state, &bits);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
@@ -862,7 +838,6 @@ hit_next:
* Avoid to free 'prealloc' if it can be merged with
* the later extent.
*/
- atomic_inc(&prealloc->refs);
err = insert_state(tree, prealloc, start, this_end,
&bits);
BUG_ON(err == -EEXIST);
@@ -872,7 +847,6 @@ hit_next:
goto out;
}
cache_state(prealloc, cached_state);
- free_extent_state(prealloc);
prealloc = NULL;
start = this_end + 1;
goto search_again;
@@ -895,11 +869,7 @@ hit_next:
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
- err = set_state_bits(tree, prealloc, &bits);
- if (err) {
- prealloc = NULL;
- goto out;
- }
+ set_state_bits(tree, prealloc, &bits);
cache_state(prealloc, cached_state);
merge_state(tree, prealloc);
prealloc = NULL;
@@ -1061,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
return 0;
}
-/*
- * find the first offset in the io tree with 'bits' set. zero is
- * returned if we find something, and *start_ret and *end_ret are
- * set to reflect the state struct that was found.
- *
- * If nothing was found, 1 is returned, < 0 on error
- */
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 1;
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node)
- goto out;
-
- while (1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->end >= start && (state->state & bits)) {
- *start_ret = state->start;
- *end_ret = state->end;
- ret = 0;
- break;
- }
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- spin_unlock(&tree->lock);
- return ret;
-}
-
/* find the first state struct with 'bits' set after 'start', and
* return it. tree->lock must be held. NULL will returned if
* nothing was found after 'start'
@@ -1133,6 +1063,30 @@ out:
}
/*
+ * find the first offset in the io tree with 'bits' set. zero is
+ * returned if we find something, and *start_ret and *end_ret are
+ * set to reflect the state struct that was found.
+ *
+ * If nothing was found, 1 is returned, < 0 on error
+ */
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits)
+{
+ struct extent_state *state;
+ int ret = 1;
+
+ spin_lock(&tree->lock);
+ state = find_first_extent_bit_state(tree, start, bits);
+ if (state) {
+ *start_ret = state->start;
+ *end_ret = state->end;
+ ret = 0;
+ }
+ spin_unlock(&tree->lock);
+ return ret;
+}
+
+/*
* find a contiguous range of bytes in the file marked as delalloc, not
* more than 'max_bytes'. start and end are used to return the range,
*
@@ -1564,7 +1518,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bitset = 0;
spin_lock(&tree->lock);
- if (cached && cached->tree && cached->start == start)
+ if (cached && cached->tree && cached->start <= start &&
+ cached->end > start)
node = &cached->rb_node;
else
node = tree_search(tree, start);
@@ -2432,6 +2387,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
+ int tag;
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
@@ -2442,11 +2398,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
end = wbc->range_end >> PAGE_CACHE_SHIFT;
scanned = 1;
}
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY, min(end - index,
- (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unsigned i;
scanned = 1;
@@ -2541,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
struct writeback_control *wbc)
{
int ret;
- struct address_space *mapping = page->mapping;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
@@ -2549,18 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
- struct writeback_control wbc_writepages = {
- .sync_mode = wbc->sync_mode,
- .older_than_this = NULL,
- .nr_to_write = 64,
- .range_start = page_offset(page) + PAGE_CACHE_SIZE,
- .range_end = (loff_t)-1,
- };
ret = __extent_writepage(page, wbc, &epd);
- extent_write_cache_pages(tree, mapping, &wbc_writepages,
- __extent_writepage, &epd, flush_write_bio);
flush_epd_write_bio(&epd);
return ret;
}
@@ -2584,7 +2535,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
};
struct writeback_control wbc_writepages = {
.sync_mode = mode,
- .older_than_this = NULL,
.nr_to_write = nr_pages * 2,
.range_start = start,
.range_end = end + 1,
@@ -3022,8 +2972,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return NULL;
eb->start = start;
eb->len = len;
- spin_lock_init(&eb->lock);
- init_waitqueue_head(&eb->lock_wq);
+ rwlock_init(&eb->lock);
+ atomic_set(&eb->write_locks, 0);
+ atomic_set(&eb->read_locks, 0);
+ atomic_set(&eb->blocking_readers, 0);
+ atomic_set(&eb->blocking_writers, 0);
+ atomic_set(&eb->spinning_readers, 0);
+ atomic_set(&eb->spinning_writers, 0);
+ init_waitqueue_head(&eb->write_lock_wq);
+ init_waitqueue_head(&eb->read_lock_wq);
#if LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
@@ -3119,7 +3076,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
i = 0;
}
for (; i < num_pages; i++, index++) {
- p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
+ p = find_or_create_page(mapping, index, GFP_NOFS);
if (!p) {
WARN_ON(1);
goto free_eb;
@@ -3266,6 +3223,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
return was_dirty;
}
+static int __eb_straddles_pages(u64 start, u64 len)
+{
+ if (len < PAGE_CACHE_SIZE)
+ return 1;
+ if (start & (PAGE_CACHE_SIZE - 1))
+ return 1;
+ if ((start + len) & (PAGE_CACHE_SIZE - 1))
+ return 1;
+ return 0;
+}
+
+static int eb_straddles_pages(struct extent_buffer *eb)
+{
+ return __eb_straddles_pages(eb->start, eb->len);
+}
+
int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state **cached_state)
@@ -3277,8 +3250,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
num_pages = num_extent_pages(eb->start, eb->len);
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- cached_state, GFP_NOFS);
+ if (eb_straddles_pages(eb)) {
+ clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ cached_state, GFP_NOFS);
+ }
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (page)
@@ -3296,8 +3271,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
num_pages = num_extent_pages(eb->start, eb->len);
- set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- NULL, GFP_NOFS);
+ if (eb_straddles_pages(eb)) {
+ set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ NULL, GFP_NOFS);
+ }
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3320,9 +3297,12 @@ int extent_range_uptodate(struct extent_io_tree *tree,
int uptodate;
unsigned long index;
- ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
- if (ret)
- return 1;
+ if (__eb_straddles_pages(start, end - start + 1)) {
+ ret = test_range_bit(tree, start, end,
+ EXTENT_UPTODATE, 1, NULL);
+ if (ret)
+ return 1;
+ }
while (start <= end) {
index = start >> PAGE_CACHE_SHIFT;
page = find_get_page(tree->mapping, index);
@@ -3350,10 +3330,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 1;
- ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, cached_state);
- if (ret)
- return ret;
+ if (eb_straddles_pages(eb)) {
+ ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1, cached_state);
+ if (ret)
+ return ret;
+ }
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
@@ -3386,9 +3368,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, NULL)) {
- return 0;
+ if (eb_straddles_pages(eb)) {
+ if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1, NULL)) {
+ return 0;
+ }
}
if (start) {
@@ -3492,9 +3476,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
page = extent_buffer_page(eb, i);
cur = min(len, (PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER1);
+ kaddr = page_address(page);
memcpy(dst, kaddr + offset, cur);
- kunmap_atomic(kaddr, KM_USER1);
dst += cur;
len -= cur;
@@ -3504,9 +3487,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
}
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **token, char **map,
+ unsigned long min_len, char **map,
unsigned long *map_start,
- unsigned long *map_len, int km)
+ unsigned long *map_len)
{
size_t offset = start & (PAGE_CACHE_SIZE - 1);
char *kaddr;
@@ -3536,42 +3519,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
}
p = extent_buffer_page(eb, i);
- kaddr = kmap_atomic(p, km);
- *token = kaddr;
+ kaddr = page_address(p);
*map = kaddr + offset;
*map_len = PAGE_CACHE_SIZE - offset;
return 0;
}
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len,
- char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km)
-{
- int err;
- int save = 0;
- if (eb->map_token) {
- unmap_extent_buffer(eb, eb->map_token, km);
- eb->map_token = NULL;
- save = 1;
- }
- err = map_private_extent_buffer(eb, start, min_len, token, map,
- map_start, map_len, km);
- if (!err && save) {
- eb->map_token = *token;
- eb->kaddr = *map;
- eb->map_start = *map_start;
- eb->map_len = *map_len;
- }
- return err;
-}
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
- kunmap_atomic(token, km);
-}
-
int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
unsigned long start,
unsigned long len)
@@ -3595,9 +3548,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
cur = min(len, (PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = page_address(page);
ret = memcmp(ptr, kaddr + offset, cur);
- kunmap_atomic(kaddr, KM_USER0);
if (ret)
break;
@@ -3630,9 +3582,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
WARN_ON(!PageUptodate(page));
cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = kmap_atomic(page, KM_USER1);
+ kaddr = page_address(page);
memcpy(kaddr + offset, src, cur);
- kunmap_atomic(kaddr, KM_USER1);
src += cur;
len -= cur;
@@ -3661,9 +3612,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
WARN_ON(!PageUptodate(page));
cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = page_address(page);
memset(kaddr + offset, c, cur);
- kunmap_atomic(kaddr, KM_USER0);
len -= cur;
offset = 0;
@@ -3694,9 +3644,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = page_address(page);
read_extent_buffer(src, kaddr + offset, src_offset, cur);
- kunmap_atomic(kaddr, KM_USER0);
src_offset += cur;
len -= cur;
@@ -3709,20 +3658,17 @@ static void move_pages(struct page *dst_page, struct page *src_page,
unsigned long dst_off, unsigned long src_off,
unsigned long len)
{
- char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+ char *dst_kaddr = page_address(dst_page);
if (dst_page == src_page) {
memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
} else {
- char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+ char *src_kaddr = page_address(src_page);
char *p = dst_kaddr + dst_off + len;
char *s = src_kaddr + src_off + len;
while (len--)
*--p = *--s;
-
- kunmap_atomic(src_kaddr, KM_USER1);
}
- kunmap_atomic(dst_kaddr, KM_USER0);
}
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3735,20 +3681,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
unsigned long dst_off, unsigned long src_off,
unsigned long len)
{
- char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+ char *dst_kaddr = page_address(dst_page);
char *src_kaddr;
if (dst_page != src_page) {
- src_kaddr = kmap_atomic(src_page, KM_USER1);
+ src_kaddr = page_address(src_page);
} else {
src_kaddr = dst_kaddr;
BUG_ON(areas_overlap(src_off, dst_off, len));
}
memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
- kunmap_atomic(dst_kaddr, KM_USER0);
- if (dst_page != src_page)
- kunmap_atomic(src_kaddr, KM_USER1);
}
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..7b2f0c3e7929 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,15 +76,15 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
- int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
- int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
- int (*merge_extent_hook)(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other);
- int (*split_extent_hook)(struct inode *inode,
- struct extent_state *orig, u64 split);
+ void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
+ int *bits);
+ void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
+ int *bits);
+ void (*merge_extent_hook)(struct inode *inode,
+ struct extent_state *new,
+ struct extent_state *other);
+ void (*split_extent_hook)(struct inode *inode,
+ struct extent_state *orig, u64 split);
int (*write_cache_pages_lock_hook)(struct page *page);
};
@@ -108,8 +108,6 @@ struct extent_state {
wait_queue_head_t wq;
atomic_t refs;
unsigned long state;
- u64 split_start;
- u64 split_end;
/* for use by the FS */
u64 private;
@@ -120,24 +118,34 @@ struct extent_state {
struct extent_buffer {
u64 start;
unsigned long len;
- char *map_token;
- char *kaddr;
unsigned long map_start;
unsigned long map_len;
struct page *first_page;
unsigned long bflags;
- atomic_t refs;
struct list_head leak_list;
struct rcu_head rcu_head;
+ atomic_t refs;
- /* the spinlock is used to protect most operations */
- spinlock_t lock;
+ /* count of read lock holders on the extent buffer */
+ atomic_t write_locks;
+ atomic_t read_locks;
+ atomic_t blocking_writers;
+ atomic_t blocking_readers;
+ atomic_t spinning_readers;
+ atomic_t spinning_writers;
+
+ /* protects write locks */
+ rwlock_t lock;
- /*
- * when we keep the lock held while blocking, waiters go onto
- * the wq
+ /* readers use lock_wq while they wait for the write
+ * lock holders to unlock
*/
- wait_queue_head_t lock_wq;
+ wait_queue_head_t write_lock_wq;
+
+ /* writers use read_lock_wq while they wait for readers
+ * to unlock
+ */
+ wait_queue_head_t read_lock_wq;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -279,15 +287,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
int extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state *cached_state);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **token, char **map,
+ unsigned long min_len, char **map,
unsigned long *map_start,
- unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+ unsigned long *map_len);
int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2d0410344ea3..7c97b3301459 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
return 0;
}
-int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
{
- int ret = 0;
struct extent_map *merge = NULL;
struct rb_node *rb;
- struct extent_map *em;
-
- write_lock(&tree->lock);
- em = lookup_extent_mapping(tree, start, len);
-
- WARN_ON(!em || em->start != start);
-
- if (!em)
- goto out;
-
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
merge->in_tree = 0;
free_extent_map(merge);
}
+}
+
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+{
+ int ret = 0;
+ struct extent_map *em;
+
+ write_lock(&tree->lock);
+ em = lookup_extent_mapping(tree, start, len);
+
+ WARN_ON(!em || em->start != start);
+
+ if (!em)
+ goto out;
+
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ try_merge_map(tree, em);
free_extent_map(em);
out:
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em)
{
int ret = 0;
- struct extent_map *merge = NULL;
struct rb_node *rb;
struct extent_map *exist;
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
goto out;
}
atomic_inc(&em->refs);
- if (em->start != 0) {
- rb = rb_prev(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
- if (rb && mergable_maps(merge, em)) {
- em->start = merge->start;
- em->len += merge->len;
- em->block_len += merge->block_len;
- em->block_start = merge->block_start;
- merge->in_tree = 0;
- rb_erase(&merge->rb_node, &tree->map);
- free_extent_map(merge);
- }
- }
- rb = rb_next(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
- if (rb && mergable_maps(em, merge)) {
- em->len += merge->len;
- em->block_len += merge->len;
- rb_erase(&merge->rb_node, &tree->map);
- merge->in_tree = 0;
- free_extent_map(merge);
- }
+
+ try_merge_map(tree, em);
out:
return ret;
}
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len)
return start + len;
}
-/**
- * lookup_extent_mapping - lookup extent_map
- * @tree: tree to lookup in
- * @start: byte offset to start the search
- * @len: length of the lookup range
- *
- * Find and return the first extent_map struct in @tree that intersects the
- * [start, len] range. There may be additional objects in the tree that
- * intersect, so check the object returned carefully to make sure that no
- * additional lookups are needed.
- */
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len)
+struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len, int strict)
{
struct extent_map *em;
struct rb_node *rb_node;
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 end = range_end(start, len);
rb_node = __tree_search(&tree->map, start, &prev, &next);
- if (!rb_node && prev) {
- em = rb_entry(prev, struct extent_map, rb_node);
- if (end > em->start && start < extent_map_end(em))
- goto found;
- }
- if (!rb_node && next) {
- em = rb_entry(next, struct extent_map, rb_node);
- if (end > em->start && start < extent_map_end(em))
- goto found;
- }
if (!rb_node) {
- em = NULL;
- goto out;
- }
- if (IS_ERR(rb_node)) {
- em = ERR_CAST(rb_node);
- goto out;
+ if (prev)
+ rb_node = prev;
+ else if (next)
+ rb_node = next;
+ else
+ return NULL;
}
+
em = rb_entry(rb_node, struct extent_map, rb_node);
- if (end > em->start && start < extent_map_end(em))
- goto found;
- em = NULL;
- goto out;
+ if (strict && !(end > em->start && start < extent_map_end(em)))
+ return NULL;
-found:
atomic_inc(&em->refs);
-out:
return em;
}
/**
+ * lookup_extent_mapping - lookup extent_map
+ * @tree: tree to lookup in
+ * @start: byte offset to start the search
+ * @len: length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range. There may be additional objects in the tree that
+ * intersect, so check the object returned carefully to make sure that no
+ * additional lookups are needed.
+ */
+struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len)
+{
+ return __lookup_extent_mapping(tree, start, len, 1);
+}
+
+/**
* search_extent_mapping - find a nearby extent map
* @tree: tree to lookup in
* @start: byte offset to start the search
@@ -365,38 +341,7 @@ out:
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
- struct extent_map *em;
- struct rb_node *rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *next = NULL;
-
- rb_node = __tree_search(&tree->map, start, &prev, &next);
- if (!rb_node && prev) {
- em = rb_entry(prev, struct extent_map, rb_node);
- goto found;
- }
- if (!rb_node && next) {
- em = rb_entry(next, struct extent_map, rb_node);
- goto found;
- }
- if (!rb_node) {
- em = NULL;
- goto out;
- }
- if (IS_ERR(rb_node)) {
- em = ERR_CAST(rb_node);
- goto out;
- }
- em = rb_entry(rb_node, struct extent_map, rb_node);
- goto found;
-
- em = NULL;
- goto out;
-
-found:
- atomic_inc(&em->refs);
-out:
- return em;
+ return __lookup_extent_mapping(tree, start, len, 0);
}
/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 90d4ee52cd45..b910694f61ed 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
WARN_ON(bio->bi_vcnt <= 0);
+ /*
+ * the free space stuff is only read when it hasn't been
+ * updated in the current transaction. So, we can safely
+ * read from the commit root and sidestep a nasty deadlock
+ * between reading the free space cache and updating the csum tree.
+ */
+ if (btrfs_is_free_space_inode(root, inode))
+ path->search_commit_root = 1;
+
disk_bytenr = (u64)bio->bi_sector << 9;
if (dio)
offset = logical_offset;
@@ -282,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
if (search_commit) {
path->skip_locking = 1;
@@ -664,15 +674,13 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_sector_sum *sector_sum;
u32 nritems;
u32 ins_size;
- char *eb_map;
- char *eb_token;
- unsigned long map_len;
- unsigned long map_start;
u16 csum_size =
btrfs_super_csum_size(&root->fs_info->super_copy);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
sector_sum = sums->sums;
again:
next_offset = (u64)-1;
@@ -814,30 +822,9 @@ found:
item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
- eb_token = NULL;
next_sector:
- if (!eb_token ||
- (unsigned long)item + csum_size >= map_start + map_len) {
- int err;
-
- if (eb_token)
- unmap_extent_buffer(leaf, eb_token, KM_USER1);
- eb_token = NULL;
- err = map_private_extent_buffer(leaf, (unsigned long)item,
- csum_size,
- &eb_token, &eb_map,
- &map_start, &map_len, KM_USER1);
- if (err)
- eb_token = NULL;
- }
- if (eb_token) {
- memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
- &sector_sum->sum, csum_size);
- } else {
- write_extent_buffer(leaf, &sector_sum->sum,
- (unsigned long)item, csum_size);
- }
+ write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
total_bytes += root->sectorsize;
sector_sum++;
@@ -850,10 +837,7 @@ next_sector:
goto next_sector;
}
}
- if (eb_token) {
- unmap_extent_buffer(leaf, eb_token, KM_USER1);
- eb_token = NULL;
- }
+
btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) {
btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa4ef18b66b1..658d66959abe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
* If an existing record is found the defrag item you
* pass in is freed
*/
-static int __btrfs_add_inode_defrag(struct inode *inode,
+static void __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *defrag)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
BTRFS_I(inode)->in_defrag = 1;
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
- return 0;
+ return;
exists:
kfree(defrag);
- return 0;
+ return;
}
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct inode_defrag *defrag;
- int ret = 0;
u64 transid;
if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->defrag_inodes_lock);
if (!BTRFS_I(inode)->in_defrag)
- ret = __btrfs_add_inode_defrag(inode, defrag);
+ __btrfs_add_inode_defrag(inode, defrag);
spin_unlock(&root->fs_info->defrag_inodes_lock);
- return ret;
+ return 0;
}
/*
@@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
again:
recow = 0;
split = start;
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
struct page **pages, size_t num_pages,
loff_t pos, unsigned long first_index,
- unsigned long last_index, size_t write_bytes)
+ size_t write_bytes)
{
struct extent_state *cached_state = NULL;
int i;
@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
again:
for (i = 0; i < num_pages; i++) {
- pages[i] = grab_cache_page(inode->i_mapping, index + i);
+ pages[i] = find_or_create_page(inode->i_mapping, index + i,
+ GFP_NOFS);
if (!pages[i]) {
faili = i - 1;
err = -ENOMEM;
@@ -1158,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
unsigned long first_index;
- unsigned long last_index;
size_t num_written = 0;
int nrptrs;
int ret = 0;
@@ -1171,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
return -ENOMEM;
first_index = pos >> PAGE_CACHE_SHIFT;
- last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
while (iov_iter_count(i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1205,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* contents of pages from loop to loop
*/
ret = prepare_pages(root, file, pages, num_pages,
- pos, first_index, last_index,
- write_bytes);
+ pos, first_index, write_bytes);
if (ret) {
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
@@ -1238,9 +1236,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_pages > dirty_pages) {
- if (copied > 0)
- atomic_inc(
- &BTRFS_I(inode)->outstanding_extents);
+ if (copied > 0) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
@@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* important optimization for directories because holding the mutex prevents
* new operations on the dir while we write to disk.
*/
-int btrfs_sync_file(struct file *file, int datasync)
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync)
trace_btrfs_sync_file(file, datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
/* we wait first, since the writeback may change the inode */
root->log_batch++;
- /* the VFS called filemap_fdatawrite for us */
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
@@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync)
* check the transaction that last modified this inode
* and see if its already been committed
*/
- if (!BTRFS_I(inode)->last_trans)
+ if (!BTRFS_I(inode)->last_trans) {
+ mutex_unlock(&inode->i_mutex);
goto out;
+ }
/*
* if the last transaction that changed this file was before
@@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync)
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
+ mutex_unlock(&inode->i_mutex);
goto out;
}
@@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync)
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ mutex_unlock(&inode->i_mutex);
goto out;
}
ret = btrfs_log_dentry_safe(trans, root, dentry);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&inode->i_mutex);
goto out;
+ }
/* we've logged all the items and now have a consistent
* version of the file in the log. It is possible that
@@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync)
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- mutex_unlock(&dentry->d_inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
if (ret > 0) {
@@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync)
} else {
ret = btrfs_end_transaction(trans, root);
}
- mutex_lock(&dentry->d_inode->i_mutex);
out:
return ret > 0 ? -EIO : ret;
}
@@ -1664,8 +1673,154 @@ out:
return ret;
}
+static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_map *em;
+ struct extent_state *cached_state = NULL;
+ u64 lockstart = *offset;
+ u64 lockend = i_size_read(inode);
+ u64 start = *offset;
+ u64 orig_start = *offset;
+ u64 len = i_size_read(inode);
+ u64 last_end = 0;
+ int ret = 0;
+
+ lockend = max_t(u64, root->sectorsize, lockend);
+ if (lockend <= lockstart)
+ lockend = lockstart + root->sectorsize;
+
+ len = lockend - lockstart + 1;
+
+ len = max_t(u64, len, root->sectorsize);
+ if (inode->i_size == 0)
+ return -ENXIO;
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+ &cached_state, GFP_NOFS);
+
+ /*
+ * Delalloc is such a pain. If we have a hole and we have pending
+ * delalloc for a portion of the hole we will get back a hole that
+ * exists for the entire range since it hasn't been actually written
+ * yet. So to take care of this case we need to look for an extent just
+ * before the position we want in case there is outstanding delalloc
+ * going on here.
+ */
+ if (origin == SEEK_HOLE && start != 0) {
+ if (start <= root->sectorsize)
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
+ root->sectorsize, 0);
+ else
+ em = btrfs_get_extent_fiemap(inode, NULL, 0,
+ start - root->sectorsize,
+ root->sectorsize, 0);
+ if (IS_ERR(em)) {
+ ret = -ENXIO;
+ goto out;
+ }
+ last_end = em->start + em->len;
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+ free_extent_map(em);
+ }
+
+ while (1) {
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+ if (IS_ERR(em)) {
+ ret = -ENXIO;
+ break;
+ }
+
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ if (last_end <= orig_start) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ if (origin == SEEK_HOLE) {
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ } else {
+ if (origin == SEEK_DATA) {
+ if (em->block_start == EXTENT_MAP_DELALLOC) {
+ if (start >= inode->i_size) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ }
+
+ start = em->start + em->len;
+ last_end = em->start + em->len;
+
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ free_extent_map(em);
+ cond_resched();
+ }
+ if (!ret)
+ *offset = min(*offset, inode->i_size);
+out:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state, GFP_NOFS);
+ return ret;
+}
+
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ case SEEK_CUR:
+ offset = generic_file_llseek_unlocked(file, offset, origin);
+ goto out;
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ ret = find_desired_extent(inode, &offset, origin);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+ }
+
+ if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
+ return -EINVAL;
+ if (offset > inode->i_sb->s_maxbytes)
+ return -EINVAL;
+
+ /* Special lock needed here? */
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+out:
+ mutex_unlock(&inode->i_mutex);
+ return offset;
+}
+
const struct file_operations btrfs_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = btrfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ad144736a5fd..6377713f639c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
return inode;
spin_lock(&block_group->lock);
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
+ printk(KERN_INFO "Old style space inode found, converting.\n");
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
+ block_group->disk_cache_state = BTRFS_DC_CLEAR;
+ }
+
if (!btrfs_fs_closing(root->fs_info)) {
block_group->inode = igrab(inode);
block_group->iref = 1;
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root,
btrfs_set_inode_gid(leaf, inode_item, 0);
btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
- BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+ BTRFS_INODE_PREALLOC);
btrfs_set_inode_nlink(leaf, inode_item, 1);
btrfs_set_inode_transid(leaf, inode_item, trans->transid);
btrfs_set_inode_block_group(leaf, inode_item, offset);
@@ -239,18 +245,13 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct page *page;
- u32 *checksums = NULL, *crc;
- char *disk_crcs = NULL;
struct btrfs_key key;
struct list_head bitmaps;
u64 num_entries;
u64 num_bitmaps;
u64 generation;
- u32 cur_crc = ~(u32)0;
pgoff_t index = 0;
- unsigned long first_page_offset;
- int num_checksums;
- int ret = 0, ret2;
+ int ret = 0;
INIT_LIST_HEAD(&bitmaps);
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (!num_entries)
goto out;
- /* Setup everything for doing checksumming */
- num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
- checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
- if (!checksums)
- goto out;
- first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
- disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
- if (!disk_crcs)
- goto out;
-
ret = readahead_cache(inode);
if (ret)
goto out;
@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space *e;
void *addr;
unsigned long offset = 0;
- unsigned long start_offset = 0;
int need_loop = 0;
if (!num_entries && !num_bitmaps)
break;
- if (index == 0) {
- start_offset = first_page_offset;
- offset = start_offset;
- }
-
- page = grab_cache_page(inode->i_mapping, index);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page)
goto free_cache;
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (index == 0) {
u64 *gen;
- memcpy(disk_crcs, addr, first_page_offset);
- gen = addr + (sizeof(u32) * num_checksums);
+ /*
+ * We put a bogus crc in the front of the first page in
+ * case old kernels try to mount a fs with the new
+ * format to make sure they discard the cache.
+ */
+ addr += sizeof(u64);
+ offset += sizeof(u64);
+
+ gen = addr;
if (*gen != BTRFS_I(inode)->generation) {
printk(KERN_ERR "btrfs: space cache generation"
" (%llu) does not match inode (%llu)\n",
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
page_cache_release(page);
goto free_cache;
}
- crc = (u32 *)disk_crcs;
+ addr += sizeof(u64);
+ offset += sizeof(u64);
}
- entry = addr + start_offset;
-
- /* First lets check our crc before we do anything fun */
- cur_crc = ~(u32)0;
- cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
- PAGE_CACHE_SIZE - start_offset);
- btrfs_csum_final(cur_crc, (char *)&cur_crc);
- if (cur_crc != *crc) {
- printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
- index);
- kunmap(page);
- unlock_page(page);
- page_cache_release(page);
- goto free_cache;
- }
- crc++;
+ entry = addr;
while (1) {
if (!num_entries)
@@ -421,11 +399,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto free_cache;
}
spin_lock(&ctl->tree_lock);
- ret2 = link_free_space(ctl, e);
+ ret = link_free_space(ctl, e);
ctl->total_bitmaps++;
ctl->op->recalc_thresholds(ctl);
spin_unlock(&ctl->tree_lock);
- list_add_tail(&e->list, &bitmaps);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
@@ -434,6 +411,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
page_cache_release(page);
goto free_cache;
}
+ list_add_tail(&e->list, &bitmaps);
}
num_entries--;
@@ -470,8 +448,6 @@ next:
ret = 1;
out:
- kfree(checksums);
- kfree(disk_crcs);
return ret;
free_cache:
__btrfs_remove_free_space_cache(ctl);
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_key key;
u64 start, end, len;
u64 bytes = 0;
- u32 *crc, *checksums;
- unsigned long first_page_offset;
+ u32 crc = ~(u32)0;
int index = 0, num_pages = 0;
int entries = 0;
int bitmaps = 0;
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
- /* Since the first page has all of our checksums and our generation we
- * need to calculate the offset into the page that we can start writing
- * our entries.
- */
- first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
-
filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size &
~(root->sectorsize - 1), (u64)-1);
- /* make sure we don't overflow that first page */
- if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
- /* this is really the same as running out of space, where we also return 0 */
- printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
- ret = 0;
- goto out_update;
- }
-
- /* We need a checksum per page. */
- crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
- if (!crc)
- return -1;
-
pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
- if (!pages) {
- kfree(crc);
+ if (!pages)
return -1;
- }
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list))
@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* know and don't freak out.
*/
while (index < num_pages) {
- page = grab_cache_page(inode->i_mapping, index);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
int i;
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- goto out_free;
+ goto out;
}
pages[index] = page;
index++;
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
/* Write out the extent entries */
do {
struct btrfs_free_space_entry *entry;
- void *addr;
+ void *addr, *orig;
unsigned long offset = 0;
- unsigned long start_offset = 0;
next_page = false;
- if (index == 0) {
- start_offset = first_page_offset;
- offset = start_offset;
- }
-
if (index >= num_pages) {
out_of_space = true;
break;
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
page = pages[index];
- addr = kmap(page);
- entry = addr + start_offset;
+ orig = addr = kmap(page);
+ if (index == 0) {
+ u64 *gen;
- memset(addr, 0, PAGE_CACHE_SIZE);
+ /*
+ * We're going to put in a bogus crc for this page to
+ * make sure that old kernels who aren't aware of this
+ * format will be sure to discard the cache.
+ */
+ addr += sizeof(u64);
+ offset += sizeof(u64);
+
+ gen = addr;
+ *gen = trans->transid;
+ addr += sizeof(u64);
+ offset += sizeof(u64);
+ }
+ entry = addr;
+
+ memset(addr, 0, PAGE_CACHE_SIZE - offset);
while (node && !next_page) {
struct btrfs_free_space *e;
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
next_page = true;
entry++;
}
- *crc = ~(u32)0;
- *crc = btrfs_csum_data(root, addr + start_offset, *crc,
- PAGE_CACHE_SIZE - start_offset);
- kunmap(page);
- btrfs_csum_final(*crc, (char *)crc);
- crc++;
+ /* Generate bogus crc value */
+ if (index == 0) {
+ u32 *tmp;
+ crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
+ PAGE_CACHE_SIZE - sizeof(u64));
+ btrfs_csum_final(crc, (char *)&crc);
+ crc++;
+ tmp = orig;
+ *tmp = crc;
+ }
+
+ kunmap(page);
bytes += PAGE_CACHE_SIZE;
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
addr = kmap(page);
memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
- *crc = ~(u32)0;
- *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
kunmap(page);
- btrfs_csum_final(*crc, (char *)crc);
- crc++;
bytes += PAGE_CACHE_SIZE;
list_del_init(&entry->list);
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
i_size_read(inode) - 1, &cached_state,
GFP_NOFS);
ret = 0;
- goto out_free;
+ goto out;
}
/* Zero out the rest of the pages just to make sure */
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
index++;
}
- /* Write the checksums and trans id to the first page */
- {
- void *addr;
- u64 *gen;
-
- page = pages[0];
-
- addr = kmap(page);
- memcpy(addr, checksums, sizeof(u32) * num_pages);
- gen = addr + (sizeof(u32) * num_pages);
- *gen = trans->transid;
- kunmap(page);
- }
-
ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
bytes, &cached_state);
btrfs_drop_pages(pages, num_pages);
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (ret) {
ret = 0;
- goto out_free;
+ goto out;
}
BTRFS_I(inode)->generation = trans->transid;
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
- goto out_free;
+ goto out;
}
leaf = path->nodes[0];
if (ret > 0) {
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
EXTENT_DO_ACCOUNTING, 0, 0, NULL,
GFP_NOFS);
btrfs_release_path(path);
- goto out_free;
+ goto out;
}
}
header = btrfs_item_ptr(leaf, path->slots[0],
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
ret = 1;
-out_free:
- kfree(checksums);
+out:
kfree(pages);
-
-out_update:
if (ret != 1) {
invalidate_inode_pages2_range(inode->i_mapping, 0, index);
BTRFS_I(inode)->generation = 0;
@@ -1417,6 +1366,23 @@ again:
return 0;
}
+static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info, u64 offset,
+ u64 bytes)
+{
+ u64 bytes_to_set = 0;
+ u64 end;
+
+ end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
+
+ bytes_to_set = min(end - offset, bytes);
+
+ bitmap_set_bits(ctl, info, offset, bytes_to_set);
+
+ return bytes_to_set;
+
+}
+
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
@@ -1453,12 +1419,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
return true;
}
+static struct btrfs_free_space_op free_space_op = {
+ .recalc_thresholds = recalculate_thresholds,
+ .use_bitmap = use_bitmap,
+};
+
static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
struct btrfs_free_space *bitmap_info;
+ struct btrfs_block_group_cache *block_group = NULL;
int added = 0;
- u64 bytes, offset, end;
+ u64 bytes, offset, bytes_added;
int ret;
bytes = info->bytes;
@@ -1467,7 +1439,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
if (!ctl->op->use_bitmap(ctl, info))
return 0;
+ if (ctl->op == &free_space_op)
+ block_group = ctl->private;
again:
+ /*
+ * Since we link bitmaps right into the cluster we need to see if we
+ * have a cluster here, and if so and it has our bitmap we need to add
+ * the free space to that bitmap.
+ */
+ if (block_group && !list_empty(&block_group->cluster_list)) {
+ struct btrfs_free_cluster *cluster;
+ struct rb_node *node;
+ struct btrfs_free_space *entry;
+
+ cluster = list_entry(block_group->cluster_list.next,
+ struct btrfs_free_cluster,
+ block_group_list);
+ spin_lock(&cluster->lock);
+ node = rb_first(&cluster->root);
+ if (!node) {
+ spin_unlock(&cluster->lock);
+ goto no_cluster_bitmap;
+ }
+
+ entry = rb_entry(node, struct btrfs_free_space, offset_index);
+ if (!entry->bitmap) {
+ spin_unlock(&cluster->lock);
+ goto no_cluster_bitmap;
+ }
+
+ if (entry->offset == offset_to_bitmap(ctl, offset)) {
+ bytes_added = add_bytes_to_bitmap(ctl, entry,
+ offset, bytes);
+ bytes -= bytes_added;
+ offset += bytes_added;
+ }
+ spin_unlock(&cluster->lock);
+ if (!bytes) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+no_cluster_bitmap:
bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0);
if (!bitmap_info) {
@@ -1475,19 +1489,10 @@ again:
goto new_bitmap;
}
- end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
-
- if (offset >= bitmap_info->offset && offset + bytes > end) {
- bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
- bytes -= end - offset;
- offset = end;
- added = 0;
- } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
- bitmap_set_bits(ctl, bitmap_info, offset, bytes);
- bytes = 0;
- } else {
- BUG();
- }
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes -= bytes_added;
+ offset += bytes_added;
+ added = 0;
if (!bytes) {
ret = 1;
@@ -1766,11 +1771,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
"\n", count);
}
-static struct btrfs_free_space_op free_space_op = {
- .recalc_thresholds = recalculate_thresholds,
- .use_bitmap = use_bitmap,
-};
-
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -1842,9 +1842,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
- unlink_free_space(ctl, info);
- kfree(info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, info);
+ if (!info->bitmap) {
+ unlink_free_space(ctl, info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ } else {
+ free_bitmap(ctl, info);
+ }
if (need_resched()) {
spin_unlock(&ctl->tree_lock);
cond_resched();
@@ -2142,9 +2145,11 @@ again:
/*
* This searches the block group for just extents to fill the cluster with.
*/
-static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 min_bytes)
+static noinline int
+setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ struct list_head *bitmaps, u64 offset, u64 bytes,
+ u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *first = NULL;
@@ -2166,6 +2171,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* extent entry.
*/
while (entry->bitmap) {
+ if (list_empty(&entry->list))
+ list_add_tail(&entry->list, bitmaps);
node = rb_next(&entry->offset_index);
if (!node)
return -ENOSPC;
@@ -2185,8 +2192,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
return -ENOSPC;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (entry->bitmap)
+ if (entry->bitmap) {
+ if (list_empty(&entry->list))
+ list_add_tail(&entry->list, bitmaps);
continue;
+ }
+
/*
* we haven't filled the empty size and the window is
* very large. reset and try again
@@ -2238,9 +2249,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* This specifically looks for bitmaps that may work in the cluster, we assume
* that we have already failed to find extents that will work.
*/
-static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 min_bytes)
+static noinline int
+setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ struct list_head *bitmaps, u64 offset, u64 bytes,
+ u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
@@ -2250,10 +2263,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
if (ctl->total_bitmaps == 0)
return -ENOSPC;
+ /*
+ * First check our cached list of bitmaps and see if there is an entry
+ * here that will work.
+ */
+ list_for_each_entry(entry, bitmaps, list) {
+ if (entry->bytes < min_bytes)
+ continue;
+ ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
+ bytes, min_bytes);
+ if (!ret)
+ return 0;
+ }
+
+ /*
+ * If we do have entries on our list and we are here then we didn't find
+ * anything, so go ahead and get the next entry after the last entry in
+ * this list and start the search from there.
+ */
+ if (!list_empty(bitmaps)) {
+ entry = list_entry(bitmaps->prev, struct btrfs_free_space,
+ list);
+ node = rb_next(&entry->offset_index);
+ if (!node)
+ return -ENOSPC;
+ entry = rb_entry(node, struct btrfs_free_space, offset_index);
+ goto search;
+ }
+
entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
if (!entry)
return -ENOSPC;
+search:
node = &entry->offset_index;
do {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2284,6 +2326,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
u64 offset, u64 bytes, u64 empty_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct list_head bitmaps;
+ struct btrfs_free_space *entry, *tmp;
u64 min_bytes;
int ret;
@@ -2322,11 +2366,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
goto out;
}
- ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
- min_bytes);
+ INIT_LIST_HEAD(&bitmaps);
+ ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
+ bytes, min_bytes);
if (ret)
- ret = setup_cluster_bitmap(block_group, cluster, offset,
- bytes, min_bytes);
+ ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
+ offset, bytes, min_bytes);
+
+ /* Clear our temporary list */
+ list_for_each_entry_safe(entry, tmp, &bitmaps, list)
+ list_del_init(&entry->list);
if (!ret) {
atomic_inc(&block_group->count);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ebf95f7a44d6..15fceefbca0a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
return alloc_hint;
}
-static inline bool is_free_space_inode(struct btrfs_root *root,
- struct inode *inode)
-{
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
- return true;
- return false;
-}
-
/*
* when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(is_free_space_inode(root, inode));
+ BUG_ON(btrfs_is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1070,9 +1061,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
- nolock = is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(root, inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
@@ -1291,15 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
return ret;
}
-static int btrfs_split_extent_hook(struct inode *inode,
- struct extent_state *orig, u64 split)
+static void btrfs_split_extent_hook(struct inode *inode,
+ struct extent_state *orig, u64 split)
{
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
- return 0;
+ return;
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
- return 0;
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
}
/*
@@ -1308,16 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
* extents, such as when we are doing sequential writes, so we can properly
* account for the metadata space we'll need.
*/
-static int btrfs_merge_extent_hook(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other)
+static void btrfs_merge_extent_hook(struct inode *inode,
+ struct extent_state *new,
+ struct extent_state *other)
{
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
- return 0;
+ return;
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
- return 0;
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
}
/*
@@ -1325,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
-static int btrfs_set_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+static void btrfs_set_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
@@ -1337,12 +1331,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(root, inode);
- if (*bits & EXTENT_FIRST_DELALLOC)
+ if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
- else
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ } else {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
@@ -1353,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
}
spin_unlock(&root->fs_info->delalloc_lock);
}
- return 0;
}
/*
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
-static int btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+static void btrfs_clear_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
@@ -1370,12 +1366,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(root, inode);
- if (*bits & EXTENT_FIRST_DELALLOC)
+ if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
- else if (!(*bits & EXTENT_DO_ACCOUNTING))
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+ } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
@@ -1394,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode,
}
spin_unlock(&root->fs_info->delalloc_lock);
}
- return 0;
}
/*
@@ -1477,7 +1475,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- if (is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(root, inode))
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1644,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
int ret;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
path->leave_spinning = 1;
@@ -1726,7 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
- nolock = is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
@@ -1986,7 +1985,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
- return 0;
+ goto good;
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2214,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (!root->orphan_block_rsv) {
block_rsv = btrfs_alloc_block_rsv(root);
- BUG_ON(!block_rsv);
+ if (!block_rsv)
+ return -ENOMEM;
}
spin_lock(&root->orphan_lock);
@@ -2509,9 +2509,16 @@ static void btrfs_read_locked_inode(struct inode *inode)
int maybe_acls;
u32 rdev;
int ret;
+ bool filled = false;
+
+ ret = btrfs_fill_inode(inode, &rdev);
+ if (!ret)
+ filled = true;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ goto make_bad;
+
path->leave_spinning = 1;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
@@ -2520,15 +2527,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
goto make_bad;
leaf = path->nodes[0];
+
+ if (filled)
+ goto cache_acl;
+
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
- if (!leaf->map_token)
- map_private_extent_buffer(leaf, (unsigned long)inode_item,
- sizeof(struct btrfs_inode_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
-
inode->i_mode = btrfs_inode_mode(leaf, inode_item);
inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2556,7 +2560,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
+cache_acl:
/*
* try to precache a NULL acl entry for files that don't have
* any xattrs or acls
@@ -2566,13 +2570,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
if (!maybe_acls)
cache_no_acl(inode);
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
btrfs_free_path(path);
- inode_item = NULL;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
@@ -2616,13 +2614,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
- if (!leaf->map_token)
- map_private_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_inode_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
-
btrfs_set_inode_uid(leaf, item, inode->i_uid);
btrfs_set_inode_gid(leaf, item, inode->i_gid);
btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2651,11 +2642,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item, 0);
-
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
}
/*
@@ -2670,12 +2656,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
int ret;
/*
- * If root is tree root, it means this inode is used to
- * store free space information. And these inodes are updated
- * when committing the transaction, so they needn't delaye to
- * be updated, or deadlock will occured.
+ * If the inode is a free space inode, we can deadlock during commit
+ * if we put it into the delayed code.
+ *
+ * The data relocation inode should also be directly updated
+ * without delay
*/
- if (!is_free_space_inode(root, inode)) {
+ if (!btrfs_is_free_space_inode(root, inode)
+ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
btrfs_set_inode_last_trans(trans, inode);
@@ -3011,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, inode);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
+out:
nr = trans->blocks_used;
__unlink_end_trans(trans, root);
btrfs_btree_balance_dirty(root, nr);
@@ -3076,6 +3067,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
+ btrfs_free_path(path);
return 0;
}
@@ -3159,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = -1;
+
if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
@@ -3171,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (min_type == 0 && root == BTRFS_I(inode)->root)
btrfs_kill_delayed_inode_items(inode);
- path = btrfs_alloc_path();
- BUG_ON(!path);
- path->reada = -1;
-
key.objectid = ino;
key.offset = (u64)-1;
key.type = (u8)-1;
@@ -3387,7 +3380,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
ret = -ENOMEM;
again:
- page = grab_cache_page(mapping, index);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
@@ -3623,7 +3616,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- is_free_space_inode(root, inode)))
+ btrfs_is_free_space_inode(root, inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -3646,7 +3639,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_i_size_write(inode, 0);
while (1) {
- trans = btrfs_start_transaction(root, 0);
+ trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = root->orphan_block_rsv;
@@ -3702,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
int ret = 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
namelen, 0);
@@ -3958,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new)
{
struct inode *inode;
+ int bad_inode = 0;
inode = btrfs_iget_locked(s, location->objectid, root);
if (!inode)
@@ -3967,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
BTRFS_I(inode)->root = root;
memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
btrfs_read_locked_inode(inode);
- inode_tree_add(inode);
- unlock_new_inode(inode);
- if (new)
- *new = 1;
+ if (!is_bad_inode(inode)) {
+ inode_tree_add(inode);
+ unlock_new_inode(inode);
+ if (new)
+ *new = 1;
+ } else {
+ bad_inode = 1;
+ }
+ }
+
+ if (bad_inode) {
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
}
return inode;
@@ -4005,12 +4009,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
struct btrfs_root *sub_root = root;
struct btrfs_key location;
int index;
- int ret;
+ int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ if (unlikely(d_need_lookup(dentry))) {
+ memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
+ kfree(dentry->d_fsdata);
+ dentry->d_fsdata = NULL;
+ d_clear_need_lookup(dentry);
+ } else {
+ ret = btrfs_inode_by_name(dir, dentry, &location);
+ }
if (ret < 0)
return ERR_PTR(ret);
@@ -4065,16 +4076,16 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
return 0;
}
+static void btrfs_dentry_release(struct dentry *dentry)
+{
+ if (dentry->d_fsdata)
+ kfree(dentry->d_fsdata);
+}
+
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
- struct inode *inode;
-
- inode = btrfs_lookup_dentry(dir, dentry);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- return d_splice_alias(inode, dentry);
+ return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
}
unsigned char btrfs_filetype_table[] = {
@@ -4093,6 +4104,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
struct btrfs_path *path;
struct list_head ins_list;
struct list_head del_list;
+ struct qstr q;
int ret;
struct extent_buffer *leaf;
int slot;
@@ -4182,6 +4194,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) {
struct btrfs_key location;
+ struct dentry *tmp;
if (verify_dir_item(root, leaf, di))
break;
@@ -4202,6 +4215,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
+ q.name = name_ptr;
+ q.len = name_len;
+ q.hash = full_name_hash(q.name, q.len);
+ tmp = d_lookup(filp->f_dentry, &q);
+ if (!tmp) {
+ struct btrfs_key *newkey;
+
+ newkey = kzalloc(sizeof(struct btrfs_key),
+ GFP_NOFS);
+ if (!newkey)
+ goto no_dentry;
+ tmp = d_alloc(filp->f_dentry, &q);
+ if (!tmp) {
+ kfree(newkey);
+ dput(tmp);
+ goto no_dentry;
+ }
+ memcpy(newkey, &location,
+ sizeof(struct btrfs_key));
+ tmp->d_fsdata = newkey;
+ tmp->d_flags |= DCACHE_NEED_LOOKUP;
+ d_rehash(tmp);
+ dput(tmp);
+ } else {
+ dput(tmp);
+ }
+no_dentry:
/* is this a reference to our own snapshot? If so
* skip it
*/
@@ -4266,7 +4306,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (BTRFS_I(inode)->dummy_inode)
return 0;
- if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+ if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4427,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
int owner;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return ERR_PTR(-ENOMEM);
inode = new_inode(root->fs_info->sb);
if (!inode) {
@@ -4462,7 +4503,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
- if (mode & S_IFDIR)
+ if (S_ISDIR(mode))
owner = 0;
else
owner = 1;
@@ -4507,7 +4548,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_inherit_iflags(inode, dir);
- if ((mode & S_IFREG)) {
+ if (S_ISREG(mode)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
if (btrfs_test_opt(root, NODATACOW) ||
@@ -4519,6 +4560,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
+ btrfs_set_inode_last_trans(trans, inode);
return inode;
fail:
@@ -4760,11 +4802,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (err) {
drop_inode = 1;
} else {
- struct dentry *parent = dget_parent(dentry);
+ struct dentry *parent = dentry->d_parent;
err = btrfs_update_inode(trans, root, inode);
BUG_ON(err);
btrfs_log_new_name(trans, inode, NULL, parent);
- dput(parent);
}
nr = trans->blocks_used;
@@ -6687,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
return 0;
}
-/* helper function for file defrag and space balancing. This
- * forces readahead on a given range of bytes in an inode
- */
-unsigned long btrfs_force_ra(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- pgoff_t offset, pgoff_t last_index)
-{
- pgoff_t req_size = last_index - offset + 1;
-
- page_cache_sync_readahead(mapping, ra, file, offset, req_size);
- return offset + req_size;
-}
-
struct inode *btrfs_alloc_inode(struct super_block *sb)
{
struct btrfs_inode *ei;
@@ -6723,8 +6751,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
- atomic_set(&ei->outstanding_extents, 0);
- atomic_set(&ei->reserved_extents, 0);
+ spin_lock_init(&ei->lock);
+ ei->outstanding_extents = 0;
+ ei->reserved_extents = 0;
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
@@ -6762,8 +6791,8 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
- WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
- WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+ WARN_ON(BTRFS_I(inode)->outstanding_extents);
+ WARN_ON(BTRFS_I(inode)->reserved_extents);
/*
* This can happen where we create an inode, but somebody else also
@@ -6818,7 +6847,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
- !is_free_space_inode(root, inode))
+ !btrfs_is_free_space_inode(root, inode))
return 1;
else
return generic_drop_inode(inode);
@@ -6888,7 +6917,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
{
struct inode *inode = dentry->d_inode;
generic_fillattr(inode, stat);
- stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
+ stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->blksize = PAGE_CACHE_SIZE;
stat->blocks = (inode_get_bytes(inode) +
BTRFS_I(inode)->delalloc_bytes) >> 9;
@@ -7056,9 +7085,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BUG_ON(ret);
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
- struct dentry *parent = dget_parent(new_dentry);
+ struct dentry *parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
- dput(parent);
btrfs_end_log_trans(root);
}
out_fail:
@@ -7182,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ err = -ENOMEM;
+ drop_inode = 1;
+ goto out_unlock;
+ }
key.objectid = btrfs_ino(inode);
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
@@ -7319,7 +7351,7 @@ static int btrfs_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
+static int btrfs_permission(struct inode *inode, int mask)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -7327,7 +7359,7 @@ static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
return -EROFS;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
- return generic_permission(inode, mask, flags, btrfs_check_acl);
+ return generic_permission(inode, mask);
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7347,10 +7379,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
+ .get_acl = btrfs_get_acl,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
+ .get_acl = btrfs_get_acl,
};
static const struct file_operations btrfs_dir_file_operations = {
@@ -7419,6 +7453,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.fiemap = btrfs_fiemap,
+ .get_acl = btrfs_get_acl,
};
static const struct inode_operations btrfs_special_inode_operations = {
.getattr = btrfs_getattr,
@@ -7428,6 +7463,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
+ .get_acl = btrfs_get_acl,
};
static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
@@ -7439,8 +7475,10 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
+ .get_acl = btrfs_get_acl,
};
const struct dentry_operations btrfs_dentry_operations = {
.d_delete = btrfs_dentry_delete,
+ .d_release = btrfs_dentry_release,
};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ac37040e426a..7cf013349941 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root,
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct btrfs_root *new_root;
- struct dentry *parent = dget_parent(dentry);
+ struct dentry *parent = dentry->d_parent;
struct inode *dir;
int ret;
int err;
@@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root,
u64 index = 0;
ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
- if (ret) {
- dput(parent);
+ if (ret)
return ret;
- }
dir = parent->d_inode;
@@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root,
* 2 - dir items
*/
trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans)) {
- dput(parent);
+ if (IS_ERR(trans))
return PTR_ERR(trans);
- }
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
@@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- dput(parent);
if (async_transid) {
*async_transid = trans->transid;
err = btrfs_commit_transaction_async(trans, root, 1);
@@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
bool readonly)
{
struct inode *inode;
- struct dentry *parent;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
@@ -482,8 +476,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
BUG_ON(ret);
+ spin_lock(&root->fs_info->trans_lock);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
+ spin_unlock(&root->fs_info->trans_lock);
if (async_transid) {
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
@@ -502,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
if (ret)
goto fail;
- parent = dget_parent(dentry);
- inode = btrfs_lookup_dentry(parent->d_inode, dentry);
- dput(parent);
+ inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto fail;
@@ -865,8 +859,8 @@ again:
/* step one, lock all the pages */
for (i = 0; i < num_pages; i++) {
struct page *page;
- page = grab_cache_page(inode->i_mapping,
- start_index + i);
+ page = find_or_create_page(inode->i_mapping,
+ start_index + i, GFP_NOFS);
if (!page)
break;
@@ -936,7 +930,9 @@ again:
GFP_NOFS);
if (i_done != num_pages) {
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode,
(num_pages - i_done) << PAGE_CACHE_SHIFT);
}
@@ -1753,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.objectid = key.offset;
key.offset = (u64)-1;
dirid = key.objectid;
-
}
if (ptr < name)
goto out;
- memcpy(name, ptr, total_len);
+ memmove(name, ptr, total_len);
name[total_len]='\0';
ret = 0;
out:
@@ -2054,29 +2049,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
{
- struct btrfs_ioctl_fs_info_args fi_args;
+ struct btrfs_ioctl_fs_info_args *fi_args;
struct btrfs_device *device;
struct btrfs_device *next;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- fi_args.num_devices = fs_devices->num_devices;
- fi_args.max_id = 0;
- memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
+ fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
+ if (!fi_args)
+ return -ENOMEM;
+
+ fi_args->num_devices = fs_devices->num_devices;
+ memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
- if (device->devid > fi_args.max_id)
- fi_args.max_id = device->devid;
+ if (device->devid > fi_args->max_id)
+ fi_args->max_id = device->devid;
}
mutex_unlock(&fs_devices->device_list_mutex);
- if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
- return -EFAULT;
+ if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
+ ret = -EFAULT;
- return 0;
+ kfree(fi_args);
+ return ret;
}
static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 66fa43dc3f0f..d77b67c4b275 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,185 +24,197 @@
#include "extent_io.h"
#include "locking.h"
-static inline void spin_nested(struct extent_buffer *eb)
-{
- spin_lock(&eb->lock);
-}
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
/*
- * Setting a lock to blocking will drop the spinlock and set the
- * flag that forces other procs who want the lock to wait. After
- * this you can safely schedule with the lock held.
+ * if we currently have a spinning reader or writer lock
+ * (indicated by the rw flag) this will bump the count
+ * of blocking holders and drop the spinlock.
*/
-void btrfs_set_lock_blocking(struct extent_buffer *eb)
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
- set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
- spin_unlock(&eb->lock);
+ if (rw == BTRFS_WRITE_LOCK) {
+ if (atomic_read(&eb->blocking_writers) == 0) {
+ WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+ atomic_dec(&eb->spinning_writers);
+ btrfs_assert_tree_locked(eb);
+ atomic_inc(&eb->blocking_writers);
+ write_unlock(&eb->lock);
+ }
+ } else if (rw == BTRFS_READ_LOCK) {
+ btrfs_assert_tree_read_locked(eb);
+ atomic_inc(&eb->blocking_readers);
+ WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+ atomic_dec(&eb->spinning_readers);
+ read_unlock(&eb->lock);
}
- /* exit with the spin lock released and the bit set */
+ return;
}
/*
- * clearing the blocking flag will take the spinlock again.
- * After this you can't safely schedule
+ * if we currently have a blocking lock, take the spinlock
+ * and drop our blocking count
*/
-void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
- if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
- spin_nested(eb);
- clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
- smp_mb__after_clear_bit();
+ if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
+ BUG_ON(atomic_read(&eb->blocking_writers) != 1);
+ write_lock(&eb->lock);
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_inc(&eb->spinning_writers);
+ if (atomic_dec_and_test(&eb->blocking_writers))
+ wake_up(&eb->write_lock_wq);
+ } else if (rw == BTRFS_READ_LOCK_BLOCKING) {
+ BUG_ON(atomic_read(&eb->blocking_readers) == 0);
+ read_lock(&eb->lock);
+ atomic_inc(&eb->spinning_readers);
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ wake_up(&eb->read_lock_wq);
}
- /* exit with the spin lock held */
+ return;
}
/*
- * unfortunately, many of the places that currently set a lock to blocking
- * don't end up blocking for very long, and often they don't block
- * at all. For a dbench 50 run, if we don't spin on the blocking bit
- * at all, the context switch rate can jump up to 400,000/sec or more.
- *
- * So, we're still stuck with this crummy spin on the blocking bit,
- * at least until the most common causes of the short blocks
- * can be dealt with.
+ * take a spinning read lock. This will wait for any blocking
+ * writers
*/
-static int btrfs_spin_on_block(struct extent_buffer *eb)
+void btrfs_tree_read_lock(struct extent_buffer *eb)
{
- int i;
-
- for (i = 0; i < 512; i++) {
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 1;
- if (need_resched())
- break;
- cpu_relax();
+again:
+ wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+ read_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers)) {
+ read_unlock(&eb->lock);
+ wait_event(eb->write_lock_wq,
+ atomic_read(&eb->blocking_writers) == 0);
+ goto again;
}
- return 0;
+ atomic_inc(&eb->read_locks);
+ atomic_inc(&eb->spinning_readers);
}
/*
- * This is somewhat different from trylock. It will take the
- * spinlock but if it finds the lock is set to blocking, it will
- * return without the lock held.
- *
- * returns 1 if it was able to take the lock and zero otherwise
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
*/
-int btrfs_try_spin_lock(struct extent_buffer *eb)
+int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
- int i;
+ if (atomic_read(&eb->blocking_writers))
+ return 0;
- if (btrfs_spin_on_block(eb)) {
- spin_nested(eb);
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 1;
- spin_unlock(&eb->lock);
+ read_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers)) {
+ read_unlock(&eb->lock);
+ return 0;
}
- /* spin for a bit on the BLOCKING flag */
- for (i = 0; i < 2; i++) {
- cpu_relax();
- if (!btrfs_spin_on_block(eb))
- break;
-
- spin_nested(eb);
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 1;
- spin_unlock(&eb->lock);
- }
- return 0;
+ atomic_inc(&eb->read_locks);
+ atomic_inc(&eb->spinning_readers);
+ return 1;
}
/*
- * the autoremove wake function will return 0 if it tried to wake up
- * a process that was already awake, which means that process won't
- * count as an exclusive wakeup. The waitq code will continue waking
- * procs until it finds one that was actually sleeping.
- *
- * For btrfs, this isn't quite what we want. We want a single proc
- * to be notified that the lock is ready for taking. If that proc
- * already happen to be awake, great, it will loop around and try for
- * the lock.
- *
- * So, btrfs_wake_function always returns 1, even when the proc that we
- * tried to wake up was already awake.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers or readers
*/
-static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
- int sync, void *key)
+int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
- autoremove_wake_function(wait, mode, sync, key);
+ if (atomic_read(&eb->blocking_writers) ||
+ atomic_read(&eb->blocking_readers))
+ return 0;
+ write_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers) ||
+ atomic_read(&eb->blocking_readers)) {
+ write_unlock(&eb->lock);
+ return 0;
+ }
+ atomic_inc(&eb->write_locks);
+ atomic_inc(&eb->spinning_writers);
return 1;
}
/*
- * returns with the extent buffer spinlocked.
- *
- * This will spin and/or wait as required to take the lock, and then
- * return with the spinlock held.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * drop a spinning read lock
+ */
+void btrfs_tree_read_unlock(struct extent_buffer *eb)
+{
+ btrfs_assert_tree_read_locked(eb);
+ WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+ atomic_dec(&eb->spinning_readers);
+ atomic_dec(&eb->read_locks);
+ read_unlock(&eb->lock);
+}
+
+/*
+ * drop a blocking read lock
+ */
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
+{
+ btrfs_assert_tree_read_locked(eb);
+ WARN_ON(atomic_read(&eb->blocking_readers) == 0);
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ wake_up(&eb->read_lock_wq);
+ atomic_dec(&eb->read_locks);
+}
+
+/*
+ * take a spinning write lock. This will wait for both
+ * blocking readers or writers
*/
int btrfs_tree_lock(struct extent_buffer *eb)
{
- DEFINE_WAIT(wait);
- wait.func = btrfs_wake_function;
-
- if (!btrfs_spin_on_block(eb))
- goto sleep;
-
- while(1) {
- spin_nested(eb);
-
- /* nobody is blocking, exit with the spinlock held */
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 0;
-
- /*
- * we have the spinlock, but the real owner is blocking.
- * wait for them
- */
- spin_unlock(&eb->lock);
-
- /*
- * spin for a bit, and if the blocking flag goes away,
- * loop around
- */
- cpu_relax();
- if (btrfs_spin_on_block(eb))
- continue;
-sleep:
- prepare_to_wait_exclusive(&eb->lock_wq, &wait,
- TASK_UNINTERRUPTIBLE);
-
- if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- schedule();
-
- finish_wait(&eb->lock_wq, &wait);
+again:
+ wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
+ wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+ write_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_readers)) {
+ write_unlock(&eb->lock);
+ wait_event(eb->read_lock_wq,
+ atomic_read(&eb->blocking_readers) == 0);
+ goto again;
}
+ if (atomic_read(&eb->blocking_writers)) {
+ write_unlock(&eb->lock);
+ wait_event(eb->write_lock_wq,
+ atomic_read(&eb->blocking_writers) == 0);
+ goto again;
+ }
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_inc(&eb->spinning_writers);
+ atomic_inc(&eb->write_locks);
return 0;
}
+/*
+ * drop a spinning or a blocking write lock.
+ */
int btrfs_tree_unlock(struct extent_buffer *eb)
{
- /*
- * if we were a blocking owner, we don't have the spinlock held
- * just clear the bit and look for waiters
- */
- if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- smp_mb__after_clear_bit();
- else
- spin_unlock(&eb->lock);
-
- if (waitqueue_active(&eb->lock_wq))
- wake_up(&eb->lock_wq);
+ int blockers = atomic_read(&eb->blocking_writers);
+
+ BUG_ON(blockers > 1);
+
+ btrfs_assert_tree_locked(eb);
+ atomic_dec(&eb->write_locks);
+
+ if (blockers) {
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_dec(&eb->blocking_writers);
+ smp_wmb();
+ wake_up(&eb->write_lock_wq);
+ } else {
+ WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+ atomic_dec(&eb->spinning_writers);
+ write_unlock(&eb->lock);
+ }
return 0;
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- assert_spin_locked(&eb->lock);
+ BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+ BUG_ON(!atomic_read(&eb->read_locks));
}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 5c33a560a2f1..17247ddb81a0 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -19,11 +19,43 @@
#ifndef __BTRFS_LOCKING_
#define __BTRFS_LOCKING_
+#define BTRFS_WRITE_LOCK 1
+#define BTRFS_READ_LOCK 2
+#define BTRFS_WRITE_LOCK_BLOCKING 3
+#define BTRFS_READ_LOCK_BLOCKING 4
+
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb);
-void btrfs_set_lock_blocking(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_tree_read_lock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb);
+int btrfs_try_tree_read_lock(struct extent_buffer *eb);
+int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+
+static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
+{
+ if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
+ btrfs_tree_unlock(eb);
+ else if (rw == BTRFS_READ_LOCK_BLOCKING)
+ btrfs_tree_read_unlock_blocking(eb);
+ else if (rw == BTRFS_READ_LOCK)
+ btrfs_tree_read_unlock(eb);
+ else
+ BUG();
+}
+
+static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
+{
+ btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
+}
+
+static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+{
+ btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
+}
#endif
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
deleted file mode 100644
index 82d569cb6267..000000000000
--- a/fs/btrfs/ref-cache.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/sort.h>
-#include "ctree.h"
-#include "ref-cache.h"
-#include "transaction.h"
-
-static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct btrfs_leaf_ref *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
-
- if (bytenr < entry->bytenr)
- p = &(*p)->rb_left;
- else if (bytenr > entry->bytenr)
- p = &(*p)->rb_right;
- else
- return parent;
- }
-
- entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
-{
- struct rb_node *n = root->rb_node;
- struct btrfs_leaf_ref *entry;
-
- while (n) {
- entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
- WARN_ON(!entry->in_tree);
-
- if (bytenr < entry->bytenr)
- n = n->rb_left;
- else if (bytenr > entry->bytenr)
- n = n->rb_right;
- else
- return n;
- }
- return NULL;
-}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
deleted file mode 100644
index 24f7001f6387..000000000000
--- a/fs/btrfs/ref-cache.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-#ifndef __REFCACHE__
-#define __REFCACHE__
-
-struct btrfs_extent_info {
- /* bytenr and num_bytes find the extent in the extent allocation tree */
- u64 bytenr;
- u64 num_bytes;
-
- /* objectid and offset find the back reference for the file */
- u64 objectid;
- u64 offset;
-};
-
-struct btrfs_leaf_ref {
- struct rb_node rb_node;
- struct btrfs_leaf_ref_tree *tree;
- int in_tree;
- atomic_t usage;
-
- u64 root_gen;
- u64 bytenr;
- u64 owner;
- u64 generation;
- int nritems;
-
- struct list_head list;
- struct btrfs_extent_info extents[];
-};
-
-static inline size_t btrfs_leaf_ref_size(int nr_extents)
-{
- return sizeof(struct btrfs_leaf_ref) +
- sizeof(struct btrfs_extent_info) * nr_extents;
-}
-#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..59bb1764273d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
int ret;
if (!root->reloc_root)
- return 0;
+ goto out;
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&reloc_root->root_key, root_item);
BUG_ON(ret);
+
+out:
return 0;
}
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;
- spin_lock(&root->fs_info->trans_lock);
+ mutex_lock(&root->fs_info->reloc_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
- spin_lock(&root->fs_info->trans_lock);
+
+ /*
+ * this serializes us with btrfs_record_root_in_transaction,
+ * we have to make sure nobody is in the middle of
+ * adding their roots to the list while we are
+ * doing this splice
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
list_splice_init(&rc->reloc_roots, &reloc_roots);
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
while (!list_empty(&reloc_roots)) {
found = 1;
@@ -2945,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
page_cache_sync_readahead(inode->i_mapping,
ra, NULL, index,
last_index + 1 - index);
- page = grab_cache_page(inode->i_mapping, index);
+ page = find_or_create_page(inode->i_mapping, index,
+ GFP_NOFS);
if (!page) {
btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE);
@@ -3590,17 +3601,19 @@ next:
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = rc;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = NULL;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ebe45443de06..f4099904565a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -71,13 +71,12 @@ out:
return ret;
}
-int btrfs_set_root_node(struct btrfs_root_item *item,
- struct extent_buffer *node)
+void btrfs_set_root_node(struct btrfs_root_item *item,
+ struct extent_buffer *node)
{
btrfs_set_root_bytenr(item, node->start);
btrfs_set_root_level(item, btrfs_header_level(node));
btrfs_set_root_generation(item, btrfs_header_generation(node));
- return 0;
}
/*
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index df50fd1eca8f..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
#include <linux/blkdev.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
@@ -804,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
-
- l = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid != logical) {
- ret = btrfs_previous_item(root, path, 0,
- BTRFS_EXTENT_ITEM_KEY);
- if (ret < 0)
- goto out;
- }
+ goto out_noplug;
+ /*
+ * we might miss half an extent here, but that doesn't matter,
+ * as it's only the prefetch
+ */
while (1) {
l = path->nodes[0];
slot = path->slots[0];
@@ -824,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
if (ret == 0)
continue;
if (ret < 0)
- goto out;
+ goto out_noplug;
break;
}
@@ -906,15 +894,20 @@ again:
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
-
- l = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid != logical) {
+ if (ret > 0) {
ret = btrfs_previous_item(root, path, 0,
BTRFS_EXTENT_ITEM_KEY);
if (ret < 0)
goto out;
+ if (ret > 0) {
+ /* there's no smaller item, so stick with the
+ * larger one */
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, root, &key,
+ path, 0, 0);
+ if (ret < 0)
+ goto out;
+ }
}
while (1) {
@@ -989,6 +982,7 @@ next:
out:
blk_finish_plug(&plug);
+out_noplug:
btrfs_free_path(path);
return ret < 0 ? ret : 0;
}
@@ -1064,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
- goto out;
- ret = 0;
+ break;
+ if (ret > 0) {
+ if (path->slots[0] >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret)
+ break;
+ }
+ }
l = path->nodes[0];
slot = path->slots[0];
@@ -1075,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
if (found_key.objectid != sdev->dev->devid)
break;
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
break;
if (found_key.offset >= end)
@@ -1104,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
if (!cache) {
ret = -ENOENT;
- goto out;
+ break;
}
ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
chunk_offset, length);
@@ -1116,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
btrfs_release_path(path);
}
-out:
btrfs_free_path(path);
- return ret;
+
+ /*
+ * ret can still be 1 from search_slot or next_leaf,
+ * that's not an error
+ */
+ return ret < 0 ? ret : 0;
}
static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1155,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
struct btrfs_fs_info *fs_info = root->fs_info;
mutex_lock(&fs_info->scrub_lock);
- if (fs_info->scrub_workers_refcnt == 0)
+ if (fs_info->scrub_workers_refcnt == 0) {
+ btrfs_init_workers(&fs_info->scrub_workers, "scrub",
+ fs_info->thread_pool_size, &fs_info->generic_worker);
+ fs_info->scrub_workers.idle_thresh = 4;
btrfs_start_workers(&fs_info->scrub_workers, 1);
+ }
++fs_info->scrub_workers_refcnt;
mutex_unlock(&fs_info->scrub_lock);
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index c0f7ecaf1e79..bc1f6ad18442 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
- /* ugly, but we want the fast path here */ \
- if (eb->map_token && offset >= eb->map_start && \
- offset + sizeof(((type *)0)->member) <= eb->map_start + \
- eb->map_len) { \
- p = (type *)(eb->kaddr + part_offset - eb->map_start); \
- return le##bits##_to_cpu(p->member); \
- } \
- { \
- int err; \
- char *map_token; \
- char *kaddr; \
- int unmap_on_exit = (eb->map_token == NULL); \
- unsigned long map_start; \
- unsigned long map_len; \
- u##bits res; \
- err = map_extent_buffer(eb, offset, \
- sizeof(((type *)0)->member), \
- &map_token, &kaddr, \
- &map_start, &map_len, KM_USER1); \
- if (err) { \
- __le##bits leres; \
- read_eb_member(eb, s, type, member, &leres); \
- return le##bits##_to_cpu(leres); \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- res = le##bits##_to_cpu(p->member); \
- if (unmap_on_exit) \
- unmap_extent_buffer(eb, map_token, KM_USER1); \
- return res; \
- } \
+ int err; \
+ char *kaddr; \
+ unsigned long map_start; \
+ unsigned long map_len; \
+ u##bits res; \
+ err = map_private_extent_buffer(eb, offset, \
+ sizeof(((type *)0)->member), \
+ &kaddr, &map_start, &map_len); \
+ if (err) { \
+ __le##bits leres; \
+ read_eb_member(eb, s, type, member, &leres); \
+ return le##bits##_to_cpu(leres); \
+ } \
+ p = (type *)(kaddr + part_offset - map_start); \
+ res = le##bits##_to_cpu(p->member); \
+ return res; \
} \
void btrfs_set_##name(struct extent_buffer *eb, \
type *s, u##bits val) \
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
- /* ugly, but we want the fast path here */ \
- if (eb->map_token && offset >= eb->map_start && \
- offset + sizeof(((type *)0)->member) <= eb->map_start + \
- eb->map_len) { \
- p = (type *)(eb->kaddr + part_offset - eb->map_start); \
- p->member = cpu_to_le##bits(val); \
- return; \
- } \
- { \
- int err; \
- char *map_token; \
- char *kaddr; \
- int unmap_on_exit = (eb->map_token == NULL); \
- unsigned long map_start; \
- unsigned long map_len; \
- err = map_extent_buffer(eb, offset, \
- sizeof(((type *)0)->member), \
- &map_token, &kaddr, \
- &map_start, &map_len, KM_USER1); \
- if (err) { \
- __le##bits val2; \
- val2 = cpu_to_le##bits(val); \
- write_eb_member(eb, s, type, member, &val2); \
- return; \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- p->member = cpu_to_le##bits(val); \
- if (unmap_on_exit) \
- unmap_extent_buffer(eb, map_token, KM_USER1); \
- } \
+ int err; \
+ char *kaddr; \
+ unsigned long map_start; \
+ unsigned long map_len; \
+ err = map_private_extent_buffer(eb, offset, \
+ sizeof(((type *)0)->member), \
+ &kaddr, &map_start, &map_len); \
+ if (err) { \
+ __le##bits val2; \
+ val2 = cpu_to_le##bits(val); \
+ write_eb_member(eb, s, type, member, &val2); \
+ return; \
+ } \
+ p = (type *)(kaddr + part_offset - map_start); \
+ p->member = cpu_to_le##bits(val); \
}
#include "ctree.h"
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
- if (eb->map_token && ptr >= eb->map_start &&
- ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
- memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
- sizeof(*disk_key));
- return;
- } else if (eb->map_token) {
- unmap_extent_buffer(eb, eb->map_token, KM_USER1);
- eb->map_token = NULL;
- }
read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
struct btrfs_key_ptr, key, disk_key);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 117e74e3604b..15634d4648d7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -723,6 +723,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",clear_cache");
if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
seq_puts(seq, ",user_subvol_rm_allowed");
+ if (btrfs_test_opt(root, ENOSPC_DEBUG))
+ seq_puts(seq, ",enospc_debug");
+ if (btrfs_test_opt(root, AUTO_DEFRAG))
+ seq_puts(seq, ",autodefrag");
+ if (btrfs_test_opt(root, INODE_MAP_CACHE))
+ seq_puts(seq, ",inode_cache");
return 0;
}
@@ -825,7 +831,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
} else {
char b[BDEVNAME_SIZE];
- s->s_flags = flags;
+ s->s_flags = flags | MS_NOSEC;
strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
error = btrfs_fill_super(s, fs_devices, data,
flags & MS_SILENT ? 1 : 0);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
#include "disk-io.h"
#include "transaction.h"
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_root *, char *);
- ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
- show, store)
-
-ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
-ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
- &btrfs_root_attr_blocks_used.attr,
- &btrfs_root_attr_block_limit.attr,
- NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_fs_info *, char *);
- ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
- show, store)
-
-SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
-SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
-SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
- &btrfs_super_attr_blocks_used.attr,
- &btrfs_super_attr_total_blocks.attr,
- &btrfs_super_attr_blocksize.attr,
- NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
-
- return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
- return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
- .show = btrfs_super_attr_show,
- .store = btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
- .show = btrfs_root_attr_show,
- .store = btrfs_root_attr_store,
-};
-
/* /sys/fs/btrfs/ entry */
static struct kset *btrfs_kset;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dd719662340e..7dc36fab4afc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
*/
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
+ /*
+ * see below for in_trans_setup usage rules
+ * we have the reloc mutex held now, so there
+ * is only one writer in this function
+ */
+ root->in_trans_setup = 1;
+
+ /* make sure readers find in_trans_setup before
+ * they find our root->last_trans update
+ */
+ smp_wmb();
+
spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) {
spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0;
}
- root->last_trans = trans->transid;
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
+ root->last_trans = trans->transid;
+
+ /* this is pretty tricky. We don't want to
+ * take the relocation lock in btrfs_record_root_in_trans
+ * unless we're really doing the first setup for this root in
+ * this transaction.
+ *
+ * Normally we'd use root->last_trans as a flag to decide
+ * if we want to take the expensive mutex.
+ *
+ * But, we have to set root->last_trans before we
+ * init the relocation root, otherwise, we trip over warnings
+ * in ctree.c. The solution used here is to flag ourselves
+ * with root->in_trans_setup. When this is 1, we're still
+ * fixing up the reloc trees and everyone must wait.
+ *
+ * When this is zero, they can trust root->last_trans and fly
+ * through btrfs_record_root_in_trans without having to take the
+ * lock. smp_wmb() makes sure that all the writes above are
+ * done before we pop in the zero below
+ */
btrfs_init_reloc_root(trans, root);
+ smp_wmb();
+ root->in_trans_setup = 0;
}
return 0;
}
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (!root->ref_cows)
+ return 0;
+
+ /*
+ * see record_root_in_trans for comments about in_trans_setup usage
+ * and barriers
+ */
+ smp_rmb();
+ if (root->last_trans == trans->transid &&
+ !root->in_trans_setup)
+ return 0;
+
+ mutex_lock(&root->fs_info->reloc_mutex);
+ record_root_in_trans(trans, root);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
+ return 0;
+}
+
/* wait for commit against the current transaction to become unblocked
* when this is done, it is safe to start a new transaction, but the current
* transaction might not be fully on disk.
@@ -159,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root)
spin_lock(&root->fs_info->trans_lock);
cur_trans = root->fs_info->running_transaction;
if (cur_trans && cur_trans->blocked) {
- DEFINE_WAIT(wait);
atomic_inc(&cur_trans->use_count);
spin_unlock(&root->fs_info->trans_lock);
- while (1) {
- prepare_to_wait(&root->fs_info->transaction_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (!cur_trans->blocked)
- break;
- schedule();
- }
- finish_wait(&root->fs_info->transaction_wait, &wait);
+
+ wait_event(root->fs_info->transaction_wait,
+ !cur_trans->blocked);
put_transaction(cur_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
@@ -203,7 +254,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
- int retries = 0;
+ u64 num_bytes = 0;
int ret;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -217,6 +268,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->block_rsv = NULL;
goto got_it;
}
+
+ /*
+ * Do the reservation before we join the transaction so we can do all
+ * the appropriate flushing if need be.
+ */
+ if (num_items > 0 && root != root->fs_info->chunk_root) {
+ num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ ret = btrfs_block_rsv_add(NULL, root,
+ &root->fs_info->trans_block_rsv,
+ num_bytes);
+ if (ret)
+ return ERR_PTR(ret);
+ }
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
@@ -253,24 +317,9 @@ again:
goto again;
}
- if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items);
- if (ret == -EAGAIN && !retries) {
- retries++;
- btrfs_commit_transaction(h, root);
- goto again;
- } else if (ret == -EAGAIN) {
- /*
- * We have already retried and got EAGAIN, so really we
- * don't have space, so set ret to -ENOSPC.
- */
- ret = -ENOSPC;
- }
-
- if (ret < 0) {
- btrfs_end_transaction(h, root);
- return ERR_PTR(ret);
- }
+ if (num_bytes) {
+ h->block_rsv = &root->fs_info->trans_block_rsv;
+ h->bytes_reserved = num_bytes;
}
got_it:
@@ -302,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
}
/* wait for a transaction commit to be fully complete */
-static noinline int wait_for_commit(struct btrfs_root *root,
+static noinline void wait_for_commit(struct btrfs_root *root,
struct btrfs_transaction *commit)
{
- DEFINE_WAIT(wait);
- while (!commit->commit_done) {
- prepare_to_wait(&commit->commit_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (commit->commit_done)
- break;
- schedule();
- }
- finish_wait(&commit->commit_wait, &wait);
- return 0;
+ wait_event(commit->commit_wait, commit->commit_done);
}
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -349,7 +389,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
list) {
if (t->in_commit) {
if (t->commit_done)
- goto out;
+ break;
cur_trans = t;
atomic_inc(&cur_trans->use_count);
break;
@@ -442,10 +482,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
if (lock && cur_trans->blocked && !cur_trans->in_commit) {
- if (throttle)
+ if (throttle) {
+ /*
+ * We may race with somebody else here so end up having
+ * to call end_transaction on ourselves again, so inc
+ * our use_count.
+ */
+ trans->use_count++;
return btrfs_commit_transaction(trans, root);
- else
+ } else {
wake_up_process(info->transaction_kthread);
+ }
}
WARN_ON(cur_trans != info->running_transaction);
@@ -882,7 +929,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent = dget_parent(dentry);
parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
- btrfs_record_root_in_trans(trans, parent_root);
+ record_root_in_trans(trans, parent_root);
/*
* insert the directory item
@@ -900,7 +947,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- btrfs_record_root_in_trans(trans, root);
+ /*
+ * pull in the delayed directory update
+ * and the delayed inode item
+ * otherwise we corrupt the FS during
+ * snapshot
+ */
+ ret = btrfs_run_delayed_items(trans, root);
+ BUG_ON(ret);
+
+ record_root_in_trans(trans, root);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1017,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
int ret;
list_for_each_entry(pending, head, list) {
- /*
- * We must deal with the delayed items before creating
- * snapshots, or we will create a snapthot with inconsistent
- * information.
- */
- ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
- BUG_ON(ret);
-
ret = create_pending_snapshot(trans, fs_info, pending);
BUG_ON(ret);
}
@@ -1022,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
static void wait_current_trans_commit_start(struct btrfs_root *root,
struct btrfs_transaction *trans)
{
- DEFINE_WAIT(wait);
-
- if (trans->in_commit)
- return;
-
- while (1) {
- prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (trans->in_commit) {
- finish_wait(&root->fs_info->transaction_blocked_wait,
- &wait);
- break;
- }
- schedule();
- finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
- }
+ wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
}
/*
@@ -1047,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
struct btrfs_transaction *trans)
{
- DEFINE_WAIT(wait);
-
- if (trans->commit_done || (trans->in_commit && !trans->blocked))
- return;
-
- while (1) {
- prepare_to_wait(&root->fs_info->transaction_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (trans->commit_done ||
- (trans->in_commit && !trans->blocked)) {
- finish_wait(&root->fs_info->transaction_wait,
- &wait);
- break;
- }
- schedule();
- finish_wait(&root->fs_info->transaction_wait,
- &wait);
- }
+ wait_event(root->fs_info->transaction_wait,
+ trans->commit_done || (trans->in_commit && !trans->blocked));
}
/*
@@ -1118,8 +1135,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
wait_current_trans_commit_start_and_unblock(root, cur_trans);
else
wait_current_trans_commit_start(root, cur_trans);
- put_transaction(cur_trans);
+ if (current->journal_info == trans)
+ current->journal_info = NULL;
+
+ put_transaction(cur_trans);
return 0;
}
@@ -1168,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
atomic_inc(&cur_trans->use_count);
btrfs_end_transaction(trans, root);
- ret = wait_for_commit(root, cur_trans);
- BUG_ON(ret);
+ wait_for_commit(root, cur_trans);
put_transaction(cur_trans);
@@ -1238,21 +1257,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
schedule_timeout(1);
finish_wait(&cur_trans->writer_wait, &wait);
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
} while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined));
- ret = create_pending_snapshots(trans, root->fs_info);
- BUG_ON(ret);
+ /*
+ * Ok now we need to make sure to block out any other joins while we
+ * commit the transaction. We could have started a join before setting
+ * no_join so make sure to wait for num_writers to == 1 again.
+ */
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 1;
+ spin_unlock(&root->fs_info->trans_lock);
+ wait_event(cur_trans->writer_wait,
+ atomic_read(&cur_trans->num_writers) == 1);
+
+ /*
+ * the reloc mutex makes sure that we stop
+ * the balancing code from coming in and moving
+ * extents around in the middle of the commit
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
+ ret = create_pending_snapshots(trans, root->fs_info);
+ BUG_ON(ret);
+
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
+ /*
+ * make sure none of the code above managed to slip in a
+ * delayed item
+ */
+ btrfs_assert_delayed_root_empty(root);
+
WARN_ON(cur_trans != trans->transaction);
btrfs_scrub_pause(root);
@@ -1309,6 +1349,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
wake_up(&root->fs_info->transaction_wait);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..babee65f8eda 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
return 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
@@ -1723,15 +1724,17 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return -ENOMEM;
if (*level == 1) {
- wc->process_func(root, next, wc, ptr_gen);
+ ret = wc->process_func(root, next, wc, ptr_gen);
+ if (ret)
+ return ret;
path->slots[*level]++;
if (wc->free) {
btrfs_read_buffer(next, ptr_gen);
btrfs_tree_lock(next);
- clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@@ -1788,16 +1791,19 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
- wc->process_func(root, path->nodes[*level], wc,
+ ret = wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
+ if (ret)
+ return ret;
+
if (wc->free) {
struct extent_buffer *next;
next = path->nodes[*level];
btrfs_tree_lock(next);
- clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@@ -1864,8 +1870,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level];
btrfs_tree_lock(next);
- clean_tree_block(trans, log, next);
btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, log, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@@ -3177,7 +3183,7 @@ again:
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
- BUG_ON(!wc.replay_dest);
+ BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da541dfca2e3..53875ae73ad4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
transid = btrfs_super_generation(disk_super);
if (disk_super->label[0])
printk(KERN_INFO "device label %s ", disk_super->label);
- else {
- /* FIXME, make a readl uuid parser */
- printk(KERN_INFO "device fsid %llx-%llx ",
- *(unsigned long long *)disk_super->fsid,
- *(unsigned long long *)(disk_super->fsid + 8));
- }
+ else
+ printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
printk(KERN_CONT "devid %llu transid %llu %s\n",
(unsigned long long)devid, (unsigned long long)transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
@@ -1041,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root,
struct btrfs_key found_key;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
key.objectid = objectid;
key.offset = (u64)-1;
@@ -2065,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root)
/* step two, relocate all the chunks */
path = btrfs_alloc_path();
- BUG_ON(!path);
-
+ if (!path) {
+ ret = -ENOMEM;
+ goto error;
+ }
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
key.offset = (u64)-1;
key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2102,7 +2101,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
- BUG_ON(ret && ret != -ENOSPC);
+ if (ret && ret != -ENOSPC)
+ goto error;
key.offset = found_key.offset - 1;
}
ret = 0;
@@ -2664,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
ret = find_next_chunk(fs_info->chunk_root,
BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
- BUG_ON(ret);
+ if (ret)
+ return ret;
alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
(fs_info->metadata_alloc_profile &
@@ -3598,7 +3599,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (!sb)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
- btrfs_set_buffer_lockdep_class(sb, 0);
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 5366fe452ab0..d733b9cfea34 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- /* first lets see if we already have this xattr */
- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
- strlen(name), -1);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
-
- /* ok we already have this xattr, lets remove it */
- if (di) {
- /* if we want create only exit */
- if (flags & XATTR_CREATE) {
- ret = -EEXIST;
+ if (flags & XATTR_REPLACE) {
+ di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
+ name_len, -1);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ } else if (!di) {
+ ret = -ENODATA;
goto out;
}
-
ret = btrfs_delete_one_dir_name(trans, root, path, di);
- BUG_ON(ret);
+ if (ret)
+ goto out;
btrfs_release_path(path);
+ }
- /* if we don't have a value then we are removing the xattr */
- if (!value)
+again:
+ ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+ name, name_len, value, size);
+ if (ret == -EEXIST) {
+ if (flags & XATTR_CREATE)
goto out;
- } else {
+ /*
+ * We can't use the path we already have since we won't have the
+ * proper locking for a delete, so release the path and
+ * re-lookup to delete the thing.
+ */
btrfs_release_path(path);
+ di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+ name, name_len, -1);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ } else if (!di) {
+ /* Shouldn't happen but just in case... */
+ btrfs_release_path(path);
+ goto again;
+ }
- if (flags & XATTR_REPLACE) {
- /* we couldn't find the attr to replace */
- ret = -ENODATA;
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ if (ret)
goto out;
+
+ /*
+ * We have a value to set, so go back and try to insert it now.
+ */
+ if (value) {
+ btrfs_release_path(path);
+ goto again;
}
}
-
- /* ok we have to create a completely new xattr */
- ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
- name, name_len, value, size);
- BUG_ON(ret);
out:
btrfs_free_path(path);
return ret;