summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/async-thread.c9
-rw-r--r--fs/btrfs/backref.c30
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h14
-rw-r--r--fs/btrfs/ctree.c276
-rw-r--r--fs/btrfs/ctree.h303
-rw-r--r--fs/btrfs/delayed-inode.c23
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/delayed-ref.c56
-rw-r--r--fs/btrfs/delayed-ref.h62
-rw-r--r--fs/btrfs/disk-io.c142
-rw-r--r--fs/btrfs/disk-io.h6
-rw-r--r--fs/btrfs/extent-tree.c358
-rw-r--r--fs/btrfs/extent_io.c58
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c38
-rw-r--r--fs/btrfs/ioctl.c357
-rw-r--r--fs/btrfs/ioctl.h70
-rw-r--r--fs/btrfs/locking.c14
-rw-r--r--fs/btrfs/qgroup.c1571
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/struct-funcs.c196
-rw-r--r--fs/btrfs/super.c28
-rw-r--r--fs/btrfs/transaction.c84
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c4
-rw-r--r--fs/btrfs/volumes.c25
-rw-r--r--fs/btrfs/volumes.h4
30 files changed, 3146 insertions, 610 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index f740644bb5a5..d7fcdba141a2 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
- reada.o backref.o ulist.o send.o
+ reada.o backref.o ulist.o qgroup.o send.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 42704149b723..58b7d14b08ee 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers,
work->ordered_func(work);
- /* now take the lock again and call the freeing code */
+ /* now take the lock again and drop our item from the list */
spin_lock(&workers->order_lock);
list_del(&work->order_list);
+ spin_unlock(&workers->order_lock);
+
+ /*
+ * we don't want to call the ordered free functions
+ * with the lock held though
+ */
work->ordered_free(work);
+ spin_lock(&workers->order_lock);
}
spin_unlock(&workers->order_lock);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e99fe0e31da2..a256f3b2a845 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 delayed_ref_seq, u64 time_seq,
- struct ulist *refs, struct ulist *roots,
- const u64 *extent_item_pos)
+ u64 time_seq, struct ulist *refs,
+ struct ulist *roots, const u64 *extent_item_pos)
{
struct btrfs_key key;
struct btrfs_path *path;
@@ -837,7 +836,7 @@ again:
btrfs_put_delayed_ref(&head->node);
goto again;
}
- ret = __add_delayed_refs(head, delayed_ref_seq,
+ ret = __add_delayed_refs(head, time_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) {
@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
*/
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 delayed_ref_seq, u64 time_seq,
- struct ulist **leafs,
+ u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
{
struct ulist *tmp;
@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
- ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
+ ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, *leafs, tmp, extent_item_pos);
ulist_free(tmp);
@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 delayed_ref_seq, u64 time_seq,
- struct ulist **roots)
+ u64 time_seq, struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
- ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
+ ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, tmp, *roots, NULL);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
- struct seq_list seq_elem = {};
struct seq_list tree_mod_seq_elem = {};
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
- struct btrfs_delayed_ref_root *delayed_refs = NULL;
pr_debug("resolving all inodes for extent %llu\n",
extent_item_objectid);
@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- btrfs_get_delayed_seq(delayed_refs, &seq_elem);
- spin_unlock(&delayed_refs->lock);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
- seq_elem.seq, tree_mod_seq_elem.seq, &refs,
+ tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
if (ret)
goto out;
@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
- seq_elem.seq,
- tree_mod_seq_elem.seq, &roots);
+ tree_mod_seq_elem.seq, &roots);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- btrfs_put_delayed_seq(delayed_refs, &seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 1a765792fbf2..032f4dc7eab8 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -59,8 +59,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 delayed_ref_seq, u64 time_seq,
- struct ulist **roots);
+ u64 time_seq, struct ulist **roots);
char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref, struct extent_buffer *eb,
u64 parent, char *dest, u32 size);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 12394a90d60f..5b2ad6bc4fe7 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -87,9 +87,6 @@ struct btrfs_inode {
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
- /* the space_info for where this inode's data allocations are done */
- struct btrfs_space_info *space_info;
-
unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
@@ -191,11 +188,14 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
BTRFS_I(inode)->disk_i_size = size;
}
-static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
- struct inode *inode)
+static inline bool btrfs_is_free_space_inode(struct inode *inode)
{
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (root == root->fs_info->tree_root &&
+ btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
+ return true;
+ if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4c10fd19d481..9d7621f271ff 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -321,7 +321,7 @@ struct tree_mod_root {
struct tree_mod_elem {
struct rb_node node;
u64 index; /* shifted logical */
- struct seq_list elem;
+ u64 seq;
enum mod_log_op op;
/* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
@@ -341,20 +341,50 @@ struct tree_mod_elem {
struct tree_mod_root old_root;
};
-static inline void
-__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
+static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
{
- elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
- list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
+ read_lock(&fs_info->tree_mod_log_lock);
}
-void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
- struct seq_list *elem)
+static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
+{
+ read_unlock(&fs_info->tree_mod_log_lock);
+}
+
+static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
+{
+ write_lock(&fs_info->tree_mod_log_lock);
+}
+
+static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
{
- elem->flags = 1;
+ write_unlock(&fs_info->tree_mod_log_lock);
+}
+
+/*
+ * This adds a new blocker to the tree mod log's blocker list if the @elem
+ * passed does not already have a sequence number set. So when a caller expects
+ * to record tree modifications, it should ensure to set elem->seq to zero
+ * before calling btrfs_get_tree_mod_seq.
+ * Returns a fresh, unused tree log modification sequence number, even if no new
+ * blocker was added.
+ */
+u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
+ struct seq_list *elem)
+{
+ u64 seq;
+
+ tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
- __get_tree_mod_seq(fs_info, elem);
+ if (!elem->seq) {
+ elem->seq = btrfs_inc_tree_mod_seq(fs_info);
+ list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
+ }
+ seq = btrfs_inc_tree_mod_seq(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
+ tree_mod_log_write_unlock(fs_info);
+
+ return seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -371,41 +401,46 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
if (!seq_putting)
return;
- BUG_ON(!(elem->flags & 1));
spin_lock(&fs_info->tree_mod_seq_lock);
list_del(&elem->list);
+ elem->seq = 0;
list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
- if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
+ if (cur_elem->seq < min_seq) {
if (seq_putting > cur_elem->seq) {
/*
* blocker with lower sequence number exists, we
* cannot remove anything from the log
*/
- goto out;
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+ return;
}
min_seq = cur_elem->seq;
}
}
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
+ /*
+ * we removed the lowest blocker from the blocker list, so there may be
+ * more processible delayed refs.
+ */
+ wake_up(&fs_info->tree_mod_seq_wait);
/*
* anything that's lower than the lowest existing (read: blocked)
* sequence number can be removed from the tree.
*/
- write_lock(&fs_info->tree_mod_log_lock);
+ tree_mod_log_write_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
tm = container_of(node, struct tree_mod_elem, node);
- if (tm->elem.seq > min_seq)
+ if (tm->seq > min_seq)
continue;
rb_erase(node, tm_root);
- list_del(&tm->elem.list);
kfree(tm);
}
- write_unlock(&fs_info->tree_mod_log_lock);
-out:
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ tree_mod_log_write_unlock(fs_info);
}
/*
@@ -423,11 +458,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- int ret = 0;
- BUG_ON(!tm || !tm->elem.seq);
+ BUG_ON(!tm || !tm->seq);
- write_lock(&fs_info->tree_mod_log_lock);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
while (*new) {
@@ -437,88 +470,81 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->index > tm->index)
new = &((*new)->rb_right);
- else if (cur->elem.seq < tm->elem.seq)
+ else if (cur->seq < tm->seq)
new = &((*new)->rb_left);
- else if (cur->elem.seq > tm->elem.seq)
+ else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
else {
kfree(tm);
- ret = -EEXIST;
- goto unlock;
+ return -EEXIST;
}
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
-unlock:
- write_unlock(&fs_info->tree_mod_log_lock);
- return ret;
+ return 0;
}
+/*
+ * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
+ * returns zero with the tree_mod_log_lock acquired. The caller must hold
+ * this until all tree mod log insertions are recorded in the rb tree and then
+ * call tree_mod_log_write_unlock() to release.
+ */
static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb) {
smp_mb();
if (list_empty(&(fs_info)->tree_mod_seq_list))
return 1;
- if (!eb)
- return 0;
- if (btrfs_header_level(eb) == 0)
+ if (eb && btrfs_header_level(eb) == 0)
+ return 1;
+
+ tree_mod_log_write_lock(fs_info);
+ if (list_empty(&fs_info->tree_mod_seq_list)) {
+ /*
+ * someone emptied the list while we were waiting for the lock.
+ * we must not add to the list when no blocker exists.
+ */
+ tree_mod_log_write_unlock(fs_info);
return 1;
+ }
+
return 0;
}
/*
- * This allocates memory and gets a tree modification sequence number when
- * needed.
+ * This allocates memory and gets a tree modification sequence number.
*
- * Returns 0 when no sequence number is needed, < 0 on error.
- * Returns 1 when a sequence number was added. In this case,
- * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
- * after inserting into the rb tree.
+ * Returns <0 on error.
+ * Returns >0 (the added sequence number) on success.
*/
static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
struct tree_mod_elem **tm_ret)
{
struct tree_mod_elem *tm;
- int seq;
- if (tree_mod_dont_log(fs_info, NULL))
- return 0;
-
- tm = *tm_ret = kzalloc(sizeof(*tm), flags);
+ /*
+ * once we switch from spin locks to something different, we should
+ * honor the flags parameter here.
+ */
+ tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
return -ENOMEM;
- tm->elem.flags = 0;
- spin_lock(&fs_info->tree_mod_seq_lock);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- /*
- * someone emptied the list while we were waiting for the lock.
- * we must not add to the list, because no blocker exists. items
- * are removed from the list only when the existing blocker is
- * removed from the list.
- */
- kfree(tm);
- seq = 0;
- spin_unlock(&fs_info->tree_mod_seq_lock);
- } else {
- __get_tree_mod_seq(fs_info, &tm->elem);
- seq = tm->elem.seq;
- }
-
- return seq;
+ tm->seq = btrfs_inc_tree_mod_seq(fs_info);
+ return tm->seq;
}
-static noinline int
-tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+static inline int
+__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
- struct tree_mod_elem *tm;
int ret;
+ struct tree_mod_elem *tm;
ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret <= 0)
+ if (ret < 0)
return ret;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
@@ -530,8 +556,22 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
- ret = __tree_mod_log_insert(fs_info, tm);
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ return __tree_mod_log_insert(fs_info, tm);
+}
+
+static noinline int
+tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
+{
+ int ret;
+
+ if (tree_mod_dont_log(fs_info, eb))
+ return 0;
+
+ ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+
+ tree_mod_log_write_unlock(fs_info);
return ret;
}
@@ -543,6 +583,14 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
}
static noinline int
+tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int slot,
+ enum mod_log_op op)
+{
+ return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
+}
+
+static noinline int
tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
@@ -555,14 +603,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
return 0;
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
+ ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
MOD_LOG_KEY_REMOVE_WHILE_MOVING);
BUG_ON(ret < 0);
}
ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret <= 0)
- return ret;
+ if (ret < 0)
+ goto out;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
tm->slot = src_slot;
@@ -571,10 +619,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
tm->op = MOD_LOG_MOVE_KEYS;
ret = __tree_mod_log_insert(fs_info, tm);
- spin_unlock(&fs_info->tree_mod_seq_lock);
+out:
+ tree_mod_log_write_unlock(fs_info);
return ret;
}
+static inline void
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+{
+ int i;
+ u32 nritems;
+ int ret;
+
+ nritems = btrfs_header_nritems(eb);
+ for (i = nritems - 1; i >= 0; i--) {
+ ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING);
+ BUG_ON(ret < 0);
+ }
+}
+
static noinline int
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *old_root,
@@ -583,9 +647,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct tree_mod_elem *tm;
int ret;
+ if (tree_mod_dont_log(fs_info, NULL))
+ return 0;
+
+ __tree_mod_log_free_eb(fs_info, old_root);
+
ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret <= 0)
- return ret;
+ if (ret < 0)
+ goto out;
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -594,7 +663,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->op = MOD_LOG_ROOT_REPLACE;
ret = __tree_mod_log_insert(fs_info, tm);
- spin_unlock(&fs_info->tree_mod_seq_lock);
+out:
+ tree_mod_log_write_unlock(fs_info);
return ret;
}
@@ -608,7 +678,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct tree_mod_elem *found = NULL;
u64 index = start >> PAGE_CACHE_SHIFT;
- read_lock(&fs_info->tree_mod_log_lock);
+ tree_mod_log_read_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
@@ -617,18 +687,18 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
node = node->rb_left;
} else if (cur->index > index) {
node = node->rb_right;
- } else if (cur->elem.seq < min_seq) {
+ } else if (cur->seq < min_seq) {
node = node->rb_left;
} else if (!smallest) {
/* we want the node with the highest seq */
if (found)
- BUG_ON(found->elem.seq > cur->elem.seq);
+ BUG_ON(found->seq > cur->seq);
found = cur;
node = node->rb_left;
- } else if (cur->elem.seq > min_seq) {
+ } else if (cur->seq > min_seq) {
/* we want the node with the smallest seq */
if (found)
- BUG_ON(found->elem.seq < cur->elem.seq);
+ BUG_ON(found->seq < cur->seq);
found = cur;
node = node->rb_right;
} else {
@@ -636,7 +706,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
break;
}
}
- read_unlock(&fs_info->tree_mod_log_lock);
+ tree_mod_log_read_unlock(fs_info);
return found;
}
@@ -664,7 +734,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static inline void
+static noinline void
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
@@ -675,18 +745,23 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
if (tree_mod_dont_log(fs_info, NULL))
return;
- if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
+ if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
+ tree_mod_log_write_unlock(fs_info);
return;
+ }
- /* speed this up by single seq for all operations? */
for (i = 0; i < nr_items; i++) {
- ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
- MOD_LOG_KEY_REMOVE);
+ ret = tree_mod_log_insert_key_locked(fs_info, src,
+ i + src_offset,
+ MOD_LOG_KEY_REMOVE);
BUG_ON(ret < 0);
- ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
- MOD_LOG_KEY_ADD);
+ ret = tree_mod_log_insert_key_locked(fs_info, dst,
+ i + dst_offset,
+ MOD_LOG_KEY_ADD);
BUG_ON(ret < 0);
}
+
+ tree_mod_log_write_unlock(fs_info);
}
static inline void
@@ -699,7 +774,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
BUG_ON(ret < 0);
}
-static inline void
+static noinline void
tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int slot, int atomic)
@@ -712,30 +787,22 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
BUG_ON(ret < 0);
}
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb)
+static noinline void
+tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
- int i;
- int ret;
- u32 nritems;
-
if (tree_mod_dont_log(fs_info, eb))
return;
- nritems = btrfs_header_nritems(eb);
- for (i = nritems - 1; i >= 0; i--) {
- ret = tree_mod_log_insert_key(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING);
- BUG_ON(ret < 0);
- }
+ __tree_mod_log_free_eb(fs_info, eb);
+
+ tree_mod_log_write_unlock(fs_info);
}
-static inline void
+static noinline void
tree_mod_log_set_root_pointer(struct btrfs_root *root,
struct extent_buffer *new_root_node)
{
int ret;
- tree_mod_log_free_eb(root->fs_info, root->node);
ret = tree_mod_log_insert_root(root->fs_info, root->node,
new_root_node, GFP_NOFS);
BUG_ON(ret < 0);
@@ -1069,7 +1136,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
- while (tm && tm->elem.seq >= time_seq) {
+ while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
* the modification. as we're going backwards, we do the
@@ -5626,6 +5693,7 @@ again:
* locked. To solve this situation, we give up
* on our lock and cycle.
*/
+ free_extent_buffer(next);
btrfs_release_path(path);
cond_resched();
goto again;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2fbbe738caed..adb1cd7ceb9b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -91,6 +91,9 @@ struct btrfs_ordered_sum;
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
+/* holds quota configuration and tracking */
+#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
@@ -913,6 +916,72 @@ struct btrfs_block_group_item {
__le64 flags;
} __attribute__ ((__packed__));
+/*
+ * is subvolume quota turned on?
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
+/*
+ * SCANNING is set during the initialization phase
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1)
+/*
+ * Some qgroup entries are known to be out of date,
+ * either because the configuration has changed in a way that
+ * makes a rescan necessary, or because the fs has been mounted
+ * with a non-qgroup-aware version.
+ * Turning qouta off and on again makes it inconsistent, too.
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
+
+#define BTRFS_QGROUP_STATUS_VERSION 1
+
+struct btrfs_qgroup_status_item {
+ __le64 version;
+ /*
+ * the generation is updated during every commit. As older
+ * versions of btrfs are not aware of qgroups, it will be
+ * possible to detect inconsistencies by checking the
+ * generation on mount time
+ */
+ __le64 generation;
+
+ /* flag definitions see above */
+ __le64 flags;
+
+ /*
+ * only used during scanning to record the progress
+ * of the scan. It contains a logical address
+ */
+ __le64 scan;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_info_item {
+ __le64 generation;
+ __le64 rfer;
+ __le64 rfer_cmpr;
+ __le64 excl;
+ __le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+/* flags definition for qgroup limits */
+#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
+
+struct btrfs_qgroup_limit_item {
+ /*
+ * only updated when any of the other values change
+ */
+ __le64 flags;
+ __le64 max_rfer;
+ __le64 max_excl;
+ __le64 rsv_rfer;
+ __le64 rsv_excl;
+} __attribute__ ((__packed__));
+
struct btrfs_space_info {
u64 flags;
@@ -1060,6 +1129,13 @@ struct btrfs_block_group_cache {
struct list_head cluster_list;
};
+/* delayed seq elem */
+struct seq_list {
+ struct list_head list;
+ u64 seq;
+};
+
+/* fs_info */
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
@@ -1074,6 +1150,7 @@ struct btrfs_fs_info {
struct btrfs_root *dev_root;
struct btrfs_root *fs_root;
struct btrfs_root *csum_root;
+ struct btrfs_root *quota_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@@ -1174,6 +1251,8 @@ struct btrfs_fs_info {
spinlock_t tree_mod_seq_lock;
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
+ struct seq_list tree_mod_seq_elem;
+ wait_queue_head_t tree_mod_seq_wait;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
@@ -1270,6 +1349,8 @@ struct btrfs_fs_info {
*/
struct list_head space_info;
+ struct btrfs_space_info *data_sinfo;
+
struct reloc_control *reloc_ctl;
spinlock_t delalloc_lock;
@@ -1326,6 +1407,29 @@ struct btrfs_fs_info {
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
u32 check_integrity_print_mask;
#endif
+ /*
+ * quota information
+ */
+ unsigned int quota_enabled:1;
+
+ /*
+ * quota_enabled only changes state after a commit. This holds the
+ * next state.
+ */
+ unsigned int pending_quota_state:1;
+
+ /* is qgroup tracking in a consistent state? */
+ u64 qgroup_flags;
+
+ /* holds configuration and tracking. Protected by qgroup_lock */
+ struct rb_root qgroup_tree;
+ spinlock_t qgroup_lock;
+
+ /* list of dirty qgroups to be written at next commit */
+ struct list_head dirty_qgroups;
+
+ /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
+ u64 qgroup_seq;
/* filesystem state */
u64 fs_state;
@@ -1557,6 +1661,30 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
+/*
+ * Records the overall state of the qgroups.
+ * There's only one instance of this key present,
+ * (0, BTRFS_QGROUP_STATUS_KEY, 0)
+ */
+#define BTRFS_QGROUP_STATUS_KEY 240
+/*
+ * Records the currently used space of the qgroup.
+ * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_INFO_KEY 242
+/*
+ * Contains the user configured limits for the qgroup.
+ * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_LIMIT_KEY 244
+/*
+ * Records the child-parent relationship of qgroups. For
+ * each relation, 2 keys are present:
+ * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
+ * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
+ */
+#define BTRFS_QGROUP_RELATION_KEY 246
+
#define BTRFS_BALANCE_ITEM_KEY 248
/*
@@ -1653,13 +1781,54 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token)
offsetof(type, member), \
sizeof(((type *)0)->member)))
-#ifndef BTRFS_SETGET_FUNCS
+#define DECLARE_BTRFS_SETGET_BITS(bits) \
+u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
+ unsigned long off, \
+ struct btrfs_map_token *token); \
+void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \
+ unsigned long off, u##bits val, \
+ struct btrfs_map_token *token); \
+static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \
+ unsigned long off) \
+{ \
+ return btrfs_get_token_##bits(eb, ptr, off, NULL); \
+} \
+static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
+ unsigned long off, u##bits val) \
+{ \
+ btrfs_set_token_##bits(eb, ptr, off, val, NULL); \
+}
+
+DECLARE_BTRFS_SETGET_BITS(8)
+DECLARE_BTRFS_SETGET_BITS(16)
+DECLARE_BTRFS_SETGET_BITS(32)
+DECLARE_BTRFS_SETGET_BITS(64)
+
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
-u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \
-void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\
-void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
-#endif
+static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \
+{ \
+ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ return btrfs_get_##bits(eb, s, offsetof(type, member)); \
+} \
+static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \
+ u##bits val) \
+{ \
+ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ btrfs_set_##bits(eb, s, offsetof(type, member), val); \
+} \
+static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \
+ struct btrfs_map_token *token) \
+{ \
+ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \
+} \
+static inline void btrfs_set_token_##name(struct extent_buffer *eb, \
+ type *s, u##bits val, \
+ struct btrfs_map_token *token) \
+{ \
+ BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
+ btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \
+}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
static inline u##bits btrfs_##name(struct extent_buffer *eb) \
@@ -2507,6 +2676,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
sizeof(val));
}
+/* btrfs_qgroup_status_item */
+BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
+ version, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
+ flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
+ scan, 64);
+
+/* btrfs_qgroup_info_item */
+BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item,
+ rfer_cmpr, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item,
+ excl_cmpr, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
+ struct btrfs_qgroup_info_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item,
+ rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr,
+ struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item,
+ excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr,
+ struct btrfs_qgroup_info_item, excl_cmpr, 64);
+
+/* btrfs_qgroup_limit_item */
+BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
+ flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item,
+ max_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item,
+ max_excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item,
+ rsv_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item,
+ rsv_excl, 64);
+
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
@@ -2649,7 +2861,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
@@ -2703,6 +2914,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
+int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2853,11 +3066,22 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
+ kfree(fs_info->quota_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
+/* tree mod log functions from ctree.c */
+u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
+ struct seq_list *elem);
+void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
+ struct seq_list *elem);
+static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
+{
+ return atomic_inc_return(&fs_info->tree_mod_seq);
+}
+
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
struct btrfs_path *path,
@@ -3126,6 +3350,23 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno);
+#define btrfs_set_fs_incompat(__fs_info, opt) \
+ __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
+ u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ u64 features;
+
+ disk_super = fs_info->super_copy;
+ features = btrfs_super_incompat_flags(disk_super);
+ if (!(features & flag)) {
+ features |= flag;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ }
+}
+
#define btrfs_abort_transaction(trans, root, errno) \
do { \
__btrfs_abort_transaction(trans, root, __func__, \
@@ -3221,17 +3462,49 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
-/* delayed seq elem */
-struct seq_list {
+/* qgroup.c */
+struct qgroup_update {
struct list_head list;
- u64 seq;
- u32 flags;
+ struct btrfs_delayed_ref_node *node;
+ struct btrfs_delayed_extent_op *extent_op;
};
-void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
- struct seq_list *elem);
-void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
- struct seq_list *elem);
+int btrfs_quota_enable(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
+int btrfs_quota_disable(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
+int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 src, u64 dst);
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid,
+ char *name);
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid);
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid,
+ struct btrfs_qgroup_limit *limit);
+int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
+void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
+struct btrfs_delayed_extent_op;
+int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op);
+int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op);
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
+ struct btrfs_qgroup_inherit *inherit);
+int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
+void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
+
+void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
static inline int is_fstree(u64 rootid)
{
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 2399f4086915..335605c8ceab 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -62,6 +62,7 @@ static inline void btrfs_init_delayed_node(
INIT_LIST_HEAD(&delayed_node->n_list);
INIT_LIST_HEAD(&delayed_node->p_list);
delayed_node->bytes_reserved = 0;
+ memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
}
static inline int btrfs_is_continuous_delayed_item(
@@ -1113,8 +1114,8 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
* Returns < 0 on error and returns with an aborted transaction with any
* outstanding delayed items cleaned up.
*/
-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int nr)
{
struct btrfs_root *curr_root = root;
struct btrfs_delayed_root *delayed_root;
@@ -1122,6 +1123,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
+ bool count = (nr > 0);
if (trans->aborted)
return -EIO;
@@ -1137,7 +1139,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
- while (curr_node) {
+ while (curr_node && (!count || (count && nr--))) {
curr_root = curr_node->root;
ret = btrfs_insert_delayed_items(trans, path, curr_root,
curr_node);
@@ -1149,6 +1151,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
path, curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
+ curr_node = NULL;
btrfs_abort_transaction(trans, root, ret);
break;
}
@@ -1158,12 +1161,26 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
btrfs_release_delayed_node(prev_node);
}
+ if (curr_node)
+ btrfs_release_delayed_node(curr_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_run_delayed_items(trans, root, -1);
+}
+
+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int nr)
+{
+ return __btrfs_run_delayed_items(trans, root, nr);
+}
+
static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index f5aa4023d3e1..4f808e1baeed 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -107,6 +107,8 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int nr);
void btrfs_balance_delayed_items(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 13ae7b04790e..da7419ed01bb 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
-int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
-
- assert_spin_locked(&delayed_refs->lock);
- if (list_empty(&delayed_refs->seq_head))
- return 0;
-
- elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
- if (seq >= elem->seq) {
- pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
- seq, elem->seq, delayed_refs);
- return 1;
+ int ret = 0;
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (!list_empty(&fs_info->tree_mod_seq_list)) {
+ elem = list_first_entry(&fs_info->tree_mod_seq_list,
+ struct seq_list, list);
+ if (seq >= elem->seq) {
+ pr_debug("holding back delayed_ref %llu, lowest is "
+ "%llu (%p)\n", seq, elem->seq, delayed_refs);
+ ret = 1;
+ }
}
- return 0;
+
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+ return ret;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
@@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
- if (is_fstree(ref_root))
- seq = inc_delayed_seq(delayed_refs);
+ if (need_ref_seq(for_cow, ref_root))
+ seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
- if (is_fstree(ref_root))
- seq = inc_delayed_seq(delayed_refs);
+ if (need_ref_seq(for_cow, ref_root))
+ seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
- if (!is_fstree(ref_root) &&
- waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
+ if (!need_ref_seq(for_cow, ref_root) &&
+ waitqueue_active(&fs_info->tree_mod_seq_wait))
+ wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
+ if (need_ref_seq(for_cow, ref_root))
+ btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
@@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
- if (!is_fstree(ref_root) &&
- waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
+ if (!need_ref_seq(for_cow, ref_root) &&
+ waitqueue_active(&fs_info->tree_mod_seq_wait))
+ wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
+ if (need_ref_seq(for_cow, ref_root))
+ btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
@@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
- if (waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
+ if (waitqueue_active(&fs_info->tree_mod_seq_wait))
+ wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 413927fb9957..0d7c90c366b6 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
-
- /*
- * seq number of delayed refs. We need to know if a backref was being
- * added before the currently processed ref or afterwards.
- */
- u64 seq;
-
- /*
- * seq_list holds a list of all seq numbers that are currently being
- * added to the list. While walking backrefs (btrfs_find_all_roots,
- * qgroups), which might take some time, no newer ref must be processed,
- * as it might influence the outcome of the walk.
- */
- struct list_head seq_head;
-
- /*
- * when the only refs we have in the list must not be processed, we want
- * to wait for more refs to show up or for the end of backref walking.
- */
- wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@@ -195,34 +175,28 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
-static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
-{
- assert_spin_locked(&delayed_refs->lock);
- ++delayed_refs->seq;
- return delayed_refs->seq;
-}
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ u64 seq);
-static inline void
-btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- struct seq_list *elem)
+/*
+ * delayed refs with a ref_seq > 0 must be held back during backref walking.
+ * this only applies to items in one of the fs-trees. for_cow items never need
+ * to be held back, so they won't get a ref_seq number.
+ */
+static inline int need_ref_seq(int for_cow, u64 rootid)
{
- assert_spin_locked(&delayed_refs->lock);
- elem->seq = delayed_refs->seq;
- list_add_tail(&elem->list, &delayed_refs->seq_head);
-}
+ if (for_cow)
+ return 0;
-static inline void
-btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- struct seq_list *elem)
-{
- spin_lock(&delayed_refs->lock);
- list_del(&elem->list);
- wake_up(&delayed_refs->seq_wait);
- spin_unlock(&delayed_refs->lock);
-}
+ if (rootid == BTRFS_FS_TREE_OBJECTID)
+ return 1;
-int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq);
+ if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
+ return 1;
+
+ return 0;
+}
/*
* a node might live in a head or a regular ref, this lets you
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c39eb71fae31..502b20c56e84 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -407,7 +407,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
break;
}
- if (failed && !ret)
+ if (failed && !ret && failed_mirror)
repair_eb_io_failure(root, eb, failed_mirror);
return ret;
@@ -1227,6 +1227,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
+struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 objectid)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ int ret = 0;
+ u64 bytenr;
+
+ root = btrfs_alloc_root(fs_info);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+
+ __setup_root(tree_root->nodesize, tree_root->leafsize,
+ tree_root->sectorsize, tree_root->stripesize,
+ root, fs_info, objectid);
+ root->root_key.objectid = objectid;
+ root->root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root->root_key.offset = 0;
+
+ leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+ 0, objectid, NULL, 0, 0, 0);
+ if (IS_ERR(leaf)) {
+ ret = PTR_ERR(leaf);
+ goto fail;
+ }
+
+ bytenr = leaf->start;
+ memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_bytenr(leaf, leaf->start);
+ btrfs_set_header_generation(leaf, trans->transid);
+ btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(leaf, objectid);
+ root->node = leaf;
+
+ write_extent_buffer(leaf, fs_info->fsid,
+ (unsigned long)btrfs_header_fsid(leaf),
+ BTRFS_FSID_SIZE);
+ write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+ BTRFS_UUID_SIZE);
+ btrfs_mark_buffer_dirty(leaf);
+
+ root->commit_root = btrfs_root_node(root);
+ root->track_dirty = 1;
+
+
+ root->root_item.flags = 0;
+ root->root_item.byte_limit = 0;
+ btrfs_set_root_bytenr(&root->root_item, leaf->start);
+ btrfs_set_root_generation(&root->root_item, trans->transid);
+ btrfs_set_root_level(&root->root_item, 0);
+ btrfs_set_root_refs(&root->root_item, 1);
+ btrfs_set_root_used(&root->root_item, leaf->len);
+ btrfs_set_root_last_snapshot(&root->root_item, 0);
+ btrfs_set_root_dirid(&root->root_item, 0);
+ root->root_item.drop_level = 0;
+
+ key.objectid = objectid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = 0;
+ ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
+ if (ret)
+ goto fail;
+
+ btrfs_tree_unlock(leaf);
+
+fail:
+ if (ret)
+ return ERR_PTR(ret);
+
+ return root;
+}
+
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
@@ -1398,6 +1474,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
return fs_info->dev_root;
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
return fs_info->csum_root;
+ if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
+ return fs_info->quota_root ? fs_info->quota_root :
+ ERR_PTR(-ENOENT);
again:
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
@@ -1825,6 +1904,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_extent_buffer(info->extent_root->commit_root);
free_extent_buffer(info->csum_root->node);
free_extent_buffer(info->csum_root->commit_root);
+ if (info->quota_root) {
+ free_extent_buffer(info->quota_root->node);
+ free_extent_buffer(info->quota_root->commit_root);
+ }
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
@@ -1834,6 +1917,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
info->extent_root->commit_root = NULL;
info->csum_root->node = NULL;
info->csum_root->commit_root = NULL;
+ if (info->quota_root) {
+ info->quota_root->node = NULL;
+ info->quota_root->commit_root = NULL;
+ }
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
@@ -1864,6 +1951,7 @@ int open_ctree(struct super_block *sb,
struct btrfs_root *csum_root;
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
+ struct btrfs_root *quota_root;
struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
@@ -1875,9 +1963,10 @@ int open_ctree(struct super_block *sb,
csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
+ quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
if (!tree_root || !extent_root || !csum_root ||
- !chunk_root || !dev_root) {
+ !chunk_root || !dev_root || !quota_root) {
err = -ENOMEM;
goto fail;
}
@@ -1946,6 +2035,8 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
+ init_waitqueue_head(&fs_info->tree_mod_seq_wait);
+
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
@@ -2034,6 +2125,13 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
+ spin_lock_init(&fs_info->qgroup_lock);
+ fs_info->qgroup_tree = RB_ROOT;
+ INIT_LIST_HEAD(&fs_info->dirty_qgroups);
+ fs_info->qgroup_seq = 1;
+ fs_info->quota_enabled = 0;
+ fs_info->pending_quota_state = 0;
+
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2246,7 +2344,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
if (ret) {
- ret = -ENOMEM;
+ err = -ENOMEM;
goto fail_sb_buffer;
}
@@ -2358,6 +2456,17 @@ retry_root_backup:
goto recovery_tree_root;
csum_root->track_dirty = 1;
+ ret = find_and_setup_root(tree_root, fs_info,
+ BTRFS_QUOTA_TREE_OBJECTID, quota_root);
+ if (ret) {
+ kfree(quota_root);
+ quota_root = fs_info->quota_root = NULL;
+ } else {
+ quota_root->track_dirty = 1;
+ fs_info->quota_enabled = 1;
+ fs_info->pending_quota_state = 1;
+ }
+
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
@@ -2417,6 +2526,9 @@ retry_root_backup:
" integrity check module %s\n", sb->s_id);
}
#endif
+ ret = btrfs_read_qgroup_config(fs_info);
+ if (ret)
+ goto fail_trans_kthread;
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0 &&
@@ -2427,7 +2539,7 @@ retry_root_backup:
printk(KERN_WARNING "Btrfs log replay required "
"on RO media\n");
err = -EIO;
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
blocksize =
btrfs_level_size(tree_root,
@@ -2436,7 +2548,7 @@ retry_root_backup:
log_tree_root = btrfs_alloc_root(fs_info);
if (!log_tree_root) {
err = -ENOMEM;
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
__setup_root(nodesize, leafsize, sectorsize, stripesize,
@@ -2468,15 +2580,15 @@ retry_root_backup:
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
- if (ret) {
- }
+ if (ret)
+ goto fail_trans_kthread;
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
printk(KERN_WARNING
"btrfs: failed to recover relocation\n");
err = -EINVAL;
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
}
@@ -2486,10 +2598,10 @@ retry_root_backup:
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
- goto fail_trans_kthread;
+ goto fail_qgroup;
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
if (sb->s_flags & MS_RDONLY)
@@ -2513,6 +2625,8 @@ retry_root_backup:
return 0;
+fail_qgroup:
+ btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
fail_cleaner:
@@ -3111,6 +3225,8 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 2;
smp_mb();
+ btrfs_free_qgroup_config(root->fs_info);
+
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
@@ -3130,6 +3246,10 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(fs_info->dev_root->commit_root);
free_extent_buffer(fs_info->csum_root->node);
free_extent_buffer(fs_info->csum_root->commit_root);
+ if (fs_info->quota_root) {
+ free_extent_buffer(fs_info->quota_root->node);
+ free_extent_buffer(fs_info->quota_root->commit_root);
+ }
btrfs_free_block_groups(fs_info);
@@ -3260,7 +3380,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
-static int btree_lock_page_hook(struct page *page, void *data,
+int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *))
{
struct inode *inode = page->mapping->host;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 05b3fab39f7e..95e147eea239 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
+void btrfs_abort_devices(struct btrfs_root *root);
+struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ u64 objectid);
+int btree_lock_page_hook(struct page *page, void *data,
+ void (*flush_fn)(void *));
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6e1d36702ff7..4e1b153b7c47 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -34,6 +34,8 @@
#include "locking.h"
#include "free-space-cache.h"
+#undef SCRAMBLE_DELAYED_REFS
+
/*
* control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
@@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
struct btrfs_delayed_extent_op *extent_op;
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int count = 0;
int must_insert_reserved = 0;
@@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
- btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
+ btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
/*
* there are still refs with lower seq numbers in the
* process of being added. Don't run this ref yet.
@@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
next:
- do_chunk_alloc(trans, root->fs_info->extent_root,
+ do_chunk_alloc(trans, fs_info->extent_root,
2 * 1024 * 1024,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
@@ -2347,21 +2350,99 @@ next:
return count;
}
-static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
+static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs,
struct list_head *first_seq)
{
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
- wait_event(delayed_refs->seq_wait,
+ wait_event(fs_info->tree_mod_seq_wait,
num_refs != delayed_refs->num_entries ||
- delayed_refs->seq_head.next != first_seq);
+ fs_info->tree_mod_seq_list.next != first_seq);
pr_debug("done waiting for more refs (num %ld, first %p)\n",
- delayed_refs->num_entries, delayed_refs->seq_head.next);
+ delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
spin_lock(&delayed_refs->lock);
}
+#ifdef SCRAMBLE_DELAYED_REFS
+/*
+ * Normally delayed refs get processed in ascending bytenr order. This
+ * correlates in most cases to the order added. To expose dependencies on this
+ * order, we start to process the tree in the middle instead of the beginning
+ */
+static u64 find_middle(struct rb_root *root)
+{
+ struct rb_node *n = root->rb_node;
+ struct btrfs_delayed_ref_node *entry;
+ int alt = 1;
+ u64 middle;
+ u64 first = 0, last = 0;
+
+ n = rb_first(root);
+ if (n) {
+ entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
+ first = entry->bytenr;
+ }
+ n = rb_last(root);
+ if (n) {
+ entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
+ last = entry->bytenr;
+ }
+ n = root->rb_node;
+
+ while (n) {
+ entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
+ WARN_ON(!entry->in_tree);
+
+ middle = entry->bytenr;
+
+ if (alt)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+
+ alt = 1 - alt;
+ }
+ return middle;
+}
+#endif
+
+int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct qgroup_update *qgroup_update;
+ int ret = 0;
+
+ if (list_empty(&trans->qgroup_ref_list) !=
+ !trans->delayed_ref_elem.seq) {
+ /* list without seq or seq without list */
+ printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
+ list_empty(&trans->qgroup_ref_list) ? "" : " not",
+ trans->delayed_ref_elem.seq);
+ BUG();
+ }
+
+ if (!trans->delayed_ref_elem.seq)
+ return 0;
+
+ while (!list_empty(&trans->qgroup_ref_list)) {
+ qgroup_update = list_first_entry(&trans->qgroup_ref_list,
+ struct qgroup_update, list);
+ list_del(&qgroup_update->list);
+ if (!ret)
+ ret = btrfs_qgroup_account_ref(
+ trans, fs_info, qgroup_update->node,
+ qgroup_update->extent_op);
+ kfree(qgroup_update);
+ }
+
+ btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
+
+ return ret;
+}
+
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
+ btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
consider_waiting = 0;
spin_lock(&delayed_refs->lock);
+
+#ifdef SCRAMBLE_DELAYED_REFS
+ delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
+#endif
+
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
@@ -2437,7 +2525,7 @@ again:
num_refs = delayed_refs->num_entries;
first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
- wait_for_more_refs(delayed_refs,
+ wait_for_more_refs(root->fs_info, delayed_refs,
num_refs, first_seq);
/*
* after waiting, things have changed. we
@@ -2502,6 +2590,7 @@ again:
}
out:
spin_unlock(&delayed_refs->lock);
+ assert_qgroups_uptodate(trans);
return 0;
}
@@ -2581,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
node = rb_prev(node);
if (node) {
+ int seq = ref->seq;
+
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (ref->bytenr == bytenr)
+ if (ref->bytenr == bytenr && ref->seq == seq)
goto out_unlock;
}
@@ -2903,8 +2994,13 @@ again:
}
spin_lock(&block_group->lock);
- if (block_group->cached != BTRFS_CACHE_FINISHED) {
- /* We're not cached, don't bother trying to write stuff out */
+ if (block_group->cached != BTRFS_CACHE_FINISHED ||
+ !btrfs_test_opt(root, SPACE_CACHE)) {
+ /*
+ * don't bother trying to write stuff out _if_
+ * a) we're not cached,
+ * b) we're with nospace_cache mount option.
+ */
dcs = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
goto out_put;
@@ -3134,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ info->data_sinfo = found;
return 0;
}
@@ -3263,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return get_alloc_profile(root, flags);
}
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
-{
- BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
- BTRFS_BLOCK_GROUP_DATA);
-}
-
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
@@ -3277,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 used;
int ret = 0, committed = 0, alloc_chunk = 1;
@@ -3289,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
committed = 1;
}
- data_sinfo = BTRFS_I(inode)->space_info;
+ data_sinfo = fs_info->data_sinfo;
if (!data_sinfo)
goto alloc;
@@ -3330,10 +3423,9 @@ alloc:
goto commit_trans;
}
- if (!data_sinfo) {
- btrfs_set_inode_space_info(root, inode);
- data_sinfo = BTRFS_I(inode)->space_info;
- }
+ if (!data_sinfo)
+ data_sinfo = fs_info->data_sinfo;
+
goto again;
}
@@ -3380,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
- data_sinfo = BTRFS_I(inode)->space_info;
+ data_sinfo = root->fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
@@ -3586,89 +3678,58 @@ out:
/*
* shrink metadata reservation for delalloc
*/
-static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
- bool wait_ordered)
+static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
+ bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
- u64 reserved;
+ u64 delalloc_bytes;
u64 max_reclaim;
- u64 reclaimed = 0;
long time_left;
unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
- unsigned long progress;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
- reserved = space_info->bytes_may_use;
- progress = space_info->reservation_progress;
-
- if (reserved == 0)
- return 0;
-
- smp_mb();
- if (root->fs_info->delalloc_bytes == 0) {
+ delalloc_bytes = root->fs_info->delalloc_bytes;
+ if (delalloc_bytes == 0) {
if (trans)
- return 0;
+ return;
btrfs_wait_ordered_extents(root, 0, 0);
- return 0;
+ return;
}
- max_reclaim = min(reserved, to_reclaim);
- nr_pages = max_t(unsigned long, nr_pages,
- max_reclaim >> PAGE_CACHE_SHIFT);
- while (loops < 1024) {
- /* have the flusher threads jump in and do some IO */
- smp_mb();
- nr_pages = min_t(unsigned long, nr_pages,
- root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+ while (delalloc_bytes && loops < 3) {
+ max_reclaim = min(delalloc_bytes, to_reclaim);
+ nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
- WB_REASON_FS_FREE_SPACE);
+ WB_REASON_FS_FREE_SPACE);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_may_use)
- reclaimed += reserved - space_info->bytes_may_use;
- reserved = space_info->bytes_may_use;
+ if (space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use + orig <=
+ space_info->total_bytes) {
+ spin_unlock(&space_info->lock);
+ break;
+ }
spin_unlock(&space_info->lock);
loops++;
-
- if (reserved == 0 || reclaimed >= max_reclaim)
- break;
-
- if (trans && trans->transaction->blocked)
- return -EAGAIN;
-
if (wait_ordered && !trans) {
btrfs_wait_ordered_extents(root, 0, 0);
} else {
- time_left = schedule_timeout_interruptible(1);
-
- /* We were interrupted, exit */
+ time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
-
- /* we've kicked the IO a few times, if anything has been freed,
- * exit. There is no sense in looping here for a long time
- * when we really need to commit the transaction, or there are
- * just too many writers without enough free space
- */
-
- if (loops > 3) {
- smp_mb();
- if (progress != space_info->reservation_progress)
- break;
- }
-
+ smp_mb();
+ delalloc_bytes = root->fs_info->delalloc_bytes;
}
-
- return reclaimed >= to_reclaim;
}
/**
@@ -3728,6 +3789,58 @@ commit:
return btrfs_commit_transaction(trans, root);
}
+enum flush_state {
+ FLUSH_DELALLOC = 1,
+ FLUSH_DELALLOC_WAIT = 2,
+ FLUSH_DELAYED_ITEMS_NR = 3,
+ FLUSH_DELAYED_ITEMS = 4,
+ COMMIT_TRANS = 5,
+};
+
+static int flush_space(struct btrfs_root *root,
+ struct btrfs_space_info *space_info, u64 num_bytes,
+ u64 orig_bytes, int state)
+{
+ struct btrfs_trans_handle *trans;
+ int nr;
+ int ret = 0;
+
+ switch (state) {
+ case FLUSH_DELALLOC:
+ case FLUSH_DELALLOC_WAIT:
+ shrink_delalloc(root, num_bytes, orig_bytes,
+ state == FLUSH_DELALLOC_WAIT);
+ break;
+ case FLUSH_DELAYED_ITEMS_NR:
+ case FLUSH_DELAYED_ITEMS:
+ if (state == FLUSH_DELAYED_ITEMS_NR) {
+ u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
+
+ nr = (int)div64_u64(num_bytes, bytes);
+ if (!nr)
+ nr = 1;
+ nr *= 2;
+ } else {
+ nr = -1;
+ }
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ ret = btrfs_run_delayed_items_nr(trans, root, nr);
+ btrfs_end_transaction(trans, root);
+ break;
+ case COMMIT_TRANS:
+ ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+ break;
+ default:
+ ret = -ENOSPC;
+ break;
+ }
+
+ return ret;
+}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
@@ -3749,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 used;
u64 num_bytes = orig_bytes;
- int retries = 0;
+ int flush_state = FLUSH_DELALLOC;
int ret = 0;
- bool committed = false;
bool flushing = false;
- bool wait_ordered = false;
+ bool committed = false;
again:
ret = 0;
@@ -3812,9 +3924,8 @@ again:
* amount plus the amount of bytes that we need for this
* reservation.
*/
- wait_ordered = true;
num_bytes = used - space_info->total_bytes +
- (orig_bytes * (retries + 1));
+ (orig_bytes * 2);
}
if (ret) {
@@ -3867,8 +3978,6 @@ again:
trace_btrfs_space_reservation(root->fs_info,
"space_info", space_info->flags, orig_bytes, 1);
ret = 0;
- } else {
- wait_ordered = true;
}
}
@@ -3887,36 +3996,13 @@ again:
if (!ret || !flush)
goto out;
- /*
- * We do synchronous shrinking since we don't actually unreserve
- * metadata until after the IO is completed.
- */
- ret = shrink_delalloc(root, num_bytes, wait_ordered);
- if (ret < 0)
- goto out;
-
- ret = 0;
-
- /*
- * So if we were overcommitted it's possible that somebody else flushed
- * out enough space and we simply didn't have enough space to reclaim,
- * so go back around and try again.
- */
- if (retries < 2) {
- wait_ordered = true;
- retries++;
+ ret = flush_space(root, space_info, num_bytes, orig_bytes,
+ flush_state);
+ flush_state++;
+ if (!ret)
goto again;
- }
-
- ret = -ENOSPC;
- if (committed)
- goto out;
-
- ret = may_commit_transaction(root, space_info, orig_bytes, 0);
- if (!ret) {
- committed = true;
+ else if (flush_state <= COMMIT_TRANS)
goto again;
- }
out:
if (flushing) {
@@ -3934,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv(
{
struct btrfs_block_rsv *block_rsv = NULL;
- if (root->ref_cows || root == root->fs_info->csum_root)
+ if (root->ref_cows)
+ block_rsv = trans->block_rsv;
+
+ if (root == root->fs_info->csum_root && trans->adding_csums)
block_rsv = trans->block_rsv;
if (!block_rsv)
@@ -4286,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
+ if (!trans->block_rsv)
+ return;
+
if (!trans->bytes_reserved)
return;
@@ -4444,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
int ret;
/* Need to be holding the i_mutex here if we aren't free space cache */
- if (btrfs_is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(inode))
flush = 0;
if (flush && btrfs_transaction_in_commit(root->fs_info))
@@ -4476,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
+ if (root->fs_info->quota_enabled) {
+ ret = btrfs_qgroup_reserve(root, num_bytes +
+ nr_extents * root->leafsize);
+ if (ret)
+ return ret;
+ }
+
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (ret) {
u64 to_free = 0;
@@ -4554,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0);
+ if (root->fs_info->quota_enabled) {
+ btrfs_qgroup_free(root, num_bytes +
+ dropped * root->leafsize);
+ }
+
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
}
@@ -5190,8 +5294,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
- if (waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
+ smp_mb();
+ if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
+ wake_up(&root->fs_info->tree_mod_seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
@@ -5748,7 +5853,11 @@ loop:
ret = do_chunk_alloc(trans, root, num_bytes +
2 * 1024 * 1024, data,
CHUNK_ALLOC_LIMITED);
- if (ret < 0) {
+ /*
+ * Do not bail out on ENOSPC since we
+ * can do more things.
+ */
+ if (ret < 0 && ret != -ENOSPC) {
btrfs_abort_transaction(trans,
root, ret);
goto out;
@@ -5816,13 +5925,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
spin_lock(&cache->lock);
- printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
- "%llu pinned %llu reserved\n",
+ printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
(unsigned long long)cache->key.objectid,
(unsigned long long)cache->key.offset,
(unsigned long long)btrfs_block_group_used(&cache->item),
(unsigned long long)cache->pinned,
- (unsigned long long)cache->reserved);
+ (unsigned long long)cache->reserved,
+ cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
}
@@ -7610,8 +7719,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
- if (need_clear)
+ if (need_clear) {
+ /*
+ * When we mount with old space cache, we need to
+ * set BTRFS_DC_CLEAR and set dirty flag.
+ *
+ * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
+ * truncate the old free space cache inode and
+ * setup a new one.
+ * b) Setting 'dirty flag' makes sure that we flush
+ * the new space cache info onto disk.
+ */
cache->disk_cache_state = BTRFS_DC_CLEAR;
+ if (btrfs_test_opt(root, SPACE_CACHE))
+ cache->dirty = 1;
+ }
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 01c21b6c6d43..3e7c9ed6505b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1918,7 +1918,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
return -EIO;
}
- printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
+ printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
"(dev %s sector %llu)\n", page->mapping->host->i_ino,
start, rcu_str_deref(dev->name), sector);
@@ -3077,8 +3077,15 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
}
}
+ /*
+ * We need to do this to prevent races in people who check if the eb is
+ * under IO since we can end up having no IO bits set for a short period
+ * of time.
+ */
+ spin_lock(&eb->refs_lock);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
spin_lock(&fs_info->delalloc_lock);
if (fs_info->dirty_metadata_bytes >= eb->len)
@@ -3087,6 +3094,8 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
WARN_ON(1);
spin_unlock(&fs_info->delalloc_lock);
ret = 1;
+ } else {
+ spin_unlock(&eb->refs_lock);
}
btrfs_tree_unlock(eb);
@@ -3557,19 +3566,38 @@ int extent_readpages(struct extent_io_tree *tree,
struct bio *bio = NULL;
unsigned page_idx;
unsigned long bio_flags = 0;
+ struct page *pagepool[16];
+ struct page *page;
+ int i = 0;
+ int nr = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
+ page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
- if (!add_to_page_cache_lru(page, mapping,
+ if (add_to_page_cache_lru(page, mapping,
page->index, GFP_NOFS)) {
- __extent_read_full_page(tree, page, get_extent,
- &bio, 0, &bio_flags);
+ page_cache_release(page);
+ continue;
}
- page_cache_release(page);
+
+ pagepool[nr++] = page;
+ if (nr < ARRAY_SIZE(pagepool))
+ continue;
+ for (i = 0; i < nr; i++) {
+ __extent_read_full_page(tree, pagepool[i], get_extent,
+ &bio, 0, &bio_flags);
+ page_cache_release(pagepool[i]);
+ }
+ nr = 0;
+ }
+ for (i = 0; i < nr; i++) {
+ __extent_read_full_page(tree, pagepool[i], get_extent,
+ &bio, 0, &bio_flags);
+ page_cache_release(pagepool[i]);
}
+
BUG_ON(!list_empty(pages));
if (bio)
return submit_one_bio(READ, bio, 0, bio_flags);
@@ -4123,11 +4151,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* So bump the ref count first, then set the bit. If someone
* beat us to it, drop the ref we added.
*/
- if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
+ spin_lock(&eb->refs_lock);
+ if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_inc(&eb->refs);
- if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_dec(&eb->refs);
- }
+ spin_unlock(&eb->refs_lock);
}
static void mark_extent_buffer_accessed(struct extent_buffer *eb)
@@ -4239,9 +4266,7 @@ again:
goto free_eb;
}
/* add one reference for the tree */
- spin_lock(&eb->refs_lock);
check_buffer_tree_ref(eb);
- spin_unlock(&eb->refs_lock);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
@@ -4300,7 +4325,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
}
/* Expects to have eb->eb_lock already held */
-static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
@@ -4321,9 +4346,11 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
btrfs_release_extent_buffer_page(eb, 0);
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
- return;
+ return 1;
}
spin_unlock(&eb->refs_lock);
+
+ return 0;
}
void free_extent_buffer(struct extent_buffer *eb)
@@ -4962,7 +4989,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
spin_unlock(&eb->refs_lock);
return 0;
}
- release_extent_buffer(eb, mask);
- return 1;
+ return release_extent_buffer(eb, mask);
}
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 5d158d320233..b45b9de0c21d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -183,7 +183,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
- if (btrfs_is_free_space_inode(root, inode)) {
+ if (btrfs_is_free_space_inode(inode)) {
path->search_commit_root = 1;
path->skip_locking = 1;
}
@@ -690,6 +690,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
return -ENOMEM;
sector_sum = sums->sums;
+ trans->adding_csums = 1;
again:
next_offset = (u64)-1;
found_next = 0;
@@ -853,6 +854,7 @@ next_sector:
goto again;
}
out:
+ trans->adding_csums = 0;
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6c4e2baa9290..6b10acfc2f5c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1968,7 +1968,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
- if (info->bytes >= bytes)
+ if (info->bytes >= bytes && !block_group->ro)
count++;
printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
(unsigned long long)info->offset,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4ffc87389545..dac1fc21d809 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -825,7 +825,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(btrfs_is_free_space_inode(root, inode));
+ BUG_ON(btrfs_is_free_space_inode(inode));
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
extent_clear_unlock_delalloc(inode,
@@ -1010,7 +1010,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
if (atomic_read(&root->fs_info->async_delalloc_pages) <
- 5 * 1042 * 1024 &&
+ 5 * 1024 * 1024 &&
waitqueue_active(&root->fs_info->async_submit_wait))
wake_up(&root->fs_info->async_submit_wait);
@@ -1035,7 +1035,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
u64 cur_end;
- int limit = 10 * 1024 * 1042;
+ int limit = 10 * 1024 * 1024;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
@@ -1153,7 +1153,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
return -ENOMEM;
}
- nolock = btrfs_is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
@@ -1466,7 +1466,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !btrfs_is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1501,7 +1501,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !btrfs_is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1612,7 +1612,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- if (btrfs_is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(inode))
metadata = 2;
if (!(rw & REQ_WRITE)) {
@@ -1869,7 +1869,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
int ret;
bool nolock;
- nolock = btrfs_is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
@@ -2007,7 +2007,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
ordered_extent->work.func = finish_ordered_fn;
ordered_extent->work.flags = 0;
- if (btrfs_is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(inode))
workers = &root->fs_info->endio_freespace_worker;
else
workers = &root->fs_info->endio_write_workers;
@@ -2732,7 +2732,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
- if (!btrfs_is_free_space_inode(root, inode)
+ if (!btrfs_is_free_space_inode(inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
btrfs_update_root_times(trans, root);
@@ -2835,7 +2835,7 @@ err:
inode_inc_iversion(inode);
inode_inc_iversion(dir);
inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- btrfs_update_inode(trans, root, dir);
+ ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
}
@@ -3745,7 +3745,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- btrfs_is_free_space_inode(root, inode)))
+ btrfs_is_free_space_inode(inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -4084,7 +4084,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
struct btrfs_iget_args *args = p;
inode->i_ino = args->ino;
BTRFS_I(inode)->root = args->root;
- btrfs_set_inode_space_info(args->root, inode);
return 0;
}
@@ -4459,7 +4458,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
- if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
+ if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4520,6 +4519,11 @@ int btrfs_dirty_inode(struct inode *inode)
static int btrfs_update_time(struct inode *inode, struct timespec *now,
int flags)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
if (flags & S_VERSION)
inode_inc_iversion(inode);
if (flags & S_CTIME)
@@ -4664,7 +4668,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
- btrfs_set_inode_space_info(root, inode);
if (S_ISDIR(mode))
owner = 0;
@@ -4692,6 +4695,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
+ memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
+ sizeof(*inode_item));
fill_inode_item(trans, path->nodes[0], inode_item, inode);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
@@ -6943,7 +6948,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return NULL;
ei->root = NULL;
- ei->space_info = NULL;
ei->generation = 0;
ei->last_trans = 0;
ei->last_sub_trans = 0;
@@ -7050,7 +7054,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
- !btrfs_is_free_space_inode(root, inode))
+ !btrfs_is_free_space_inode(inode))
return 1;
else
return generic_drop_inode(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bca6997fdb80..43f0012016e3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -338,7 +338,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
char *name, int namelen,
- u64 *async_transid)
+ u64 *async_transid,
+ struct btrfs_qgroup_inherit **inherit)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@@ -372,6 +373,11 @@ static noinline int create_subvol(struct btrfs_root *root,
if (IS_ERR(trans))
return PTR_ERR(trans);
+ ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
+ inherit ? *inherit : NULL);
+ if (ret)
+ goto fail;
+
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
@@ -496,7 +502,7 @@ fail:
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen, u64 *async_transid,
- bool readonly)
+ bool readonly, struct btrfs_qgroup_inherit **inherit)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
@@ -514,6 +520,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
pending_snapshot->readonly = readonly;
+ if (inherit) {
+ pending_snapshot->inherit = *inherit;
+ *inherit = NULL; /* take responsibility to free it */
+ }
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
if (IS_ERR(trans)) {
@@ -647,7 +657,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
- u64 *async_transid, bool readonly)
+ u64 *async_transid, bool readonly,
+ struct btrfs_qgroup_inherit **inherit)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@@ -664,13 +675,9 @@ static noinline int btrfs_mksubvol(struct path *parent,
if (dentry->d_inode)
goto out_dput;
- error = mnt_want_write(parent->mnt);
- if (error)
- goto out_dput;
-
error = btrfs_may_create(dir, dentry);
if (error)
- goto out_drop_write;
+ goto out_dput;
down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
@@ -678,18 +685,16 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
- error = create_snapshot(snap_src, dentry,
- name, namelen, async_transid, readonly);
+ error = create_snapshot(snap_src, dentry, name, namelen,
+ async_transid, readonly, inherit);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
- name, namelen, async_transid);
+ name, namelen, async_transid, inherit);
}
if (!error)
fsnotify_mkdir(dir, dentry);
out_up_read:
up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
-out_drop_write:
- mnt_drop_write(parent->mnt);
out_dput:
dput(dentry);
out_unlock:
@@ -844,7 +849,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
}
static int should_defrag_range(struct inode *inode, u64 start, int thresh,
- u64 *last_len, u64 *skip, u64 *defrag_end)
+ u64 *last_len, u64 *skip, u64 *defrag_end,
+ int compress)
{
struct extent_map *em;
int ret = 1;
@@ -875,7 +881,7 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh,
* we hit a real extent, if it is big or the next extent is not a
* real extent, don't bother defragging it
*/
- if ((*last_len == 0 || *last_len >= thresh) &&
+ if (!compress && (*last_len == 0 || *last_len >= thresh) &&
(em->len >= thresh || !next_mergeable))
ret = 0;
out:
@@ -1059,11 +1065,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
u64 newer_than, unsigned long max_to_defrag)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_super_block *disk_super;
struct file_ra_state *ra = NULL;
unsigned long last_index;
u64 isize = i_size_read(inode);
- u64 features;
u64 last_len = 0;
u64 skip = 0;
u64 defrag_end = 0;
@@ -1157,7 +1161,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
extent_thresh, &last_len, &skip,
- &defrag_end)) {
+ &defrag_end, range->flags &
+ BTRFS_DEFRAG_RANGE_COMPRESS)) {
unsigned long next;
/*
* the should_defrag function tells us how much to skip
@@ -1249,11 +1254,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
mutex_unlock(&inode->i_mutex);
}
- disk_super = root->fs_info->super_copy;
- features = btrfs_super_incompat_flags(disk_super);
if (range->compress_type == BTRFS_COMPRESS_LZO) {
- features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
- btrfs_set_super_incompat_flags(disk_super, features);
+ btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
}
ret = defrag_count;
@@ -1391,41 +1393,39 @@ out:
}
static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
- char *name,
- unsigned long fd,
- int subvol,
- u64 *transid,
- bool readonly)
+ char *name, unsigned long fd, int subvol,
+ u64 *transid, bool readonly,
+ struct btrfs_qgroup_inherit **inherit)
{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct file *src_file;
int namelen;
int ret = 0;
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
+ ret = mnt_want_write_file(file);
+ if (ret)
+ goto out;
namelen = strlen(name);
if (strchr(name, '/')) {
ret = -EINVAL;
- goto out;
+ goto out_drop_write;
}
if (name[0] == '.' &&
(namelen == 1 || (name[1] == '.' && namelen == 2))) {
ret = -EEXIST;
- goto out;
+ goto out_drop_write;
}
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, transid, readonly);
+ NULL, transid, readonly, inherit);
} else {
struct inode *src_inode;
src_file = fget(fd);
if (!src_file) {
ret = -EINVAL;
- goto out;
+ goto out_drop_write;
}
src_inode = src_file->f_path.dentry->d_inode;
@@ -1434,13 +1434,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
"another FS\n");
ret = -EINVAL;
fput(src_file);
- goto out;
+ goto out_drop_write;
}
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
- transid, readonly);
+ transid, readonly, inherit);
fput(src_file);
}
+out_drop_write:
+ mnt_drop_write_file(file);
out:
return ret;
}
@@ -1458,7 +1460,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol,
- NULL, false);
+ NULL, false, NULL);
kfree(vol_args);
return ret;
@@ -1472,6 +1474,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
u64 transid = 0;
u64 *ptr = NULL;
bool readonly = false;
+ struct btrfs_qgroup_inherit *inherit = NULL;
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
@@ -1479,7 +1482,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
if (vol_args->flags &
- ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
+ ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
+ BTRFS_SUBVOL_QGROUP_INHERIT)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -1488,10 +1492,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ptr = &transid;
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
+ if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
+ if (vol_args->size > PAGE_CACHE_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+ inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
+ if (IS_ERR(inherit)) {
+ ret = PTR_ERR(inherit);
+ goto out;
+ }
+ }
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
- vol_args->fd, subvol,
- ptr, readonly);
+ vol_args->fd, subvol, ptr,
+ readonly, &inherit);
if (ret == 0 && ptr &&
copy_to_user(arg +
@@ -1500,6 +1515,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ret = -EFAULT;
out:
kfree(vol_args);
+ kfree(inherit);
return ret;
}
@@ -1535,29 +1551,40 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
+ ret = mnt_want_write_file(file);
+ if (ret)
+ goto out;
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
- return -EINVAL;
+ if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
+ ret = -EINVAL;
+ goto out_drop_write;
+ }
- if (copy_from_user(&flags, arg, sizeof(flags)))
- return -EFAULT;
+ if (copy_from_user(&flags, arg, sizeof(flags))) {
+ ret = -EFAULT;
+ goto out_drop_write;
+ }
- if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
- return -EINVAL;
+ if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
+ ret = -EINVAL;
+ goto out_drop_write;
+ }
- if (flags & ~BTRFS_SUBVOL_RDONLY)
- return -EOPNOTSUPP;
+ if (flags & ~BTRFS_SUBVOL_RDONLY) {
+ ret = -EOPNOTSUPP;
+ goto out_drop_write;
+ }
- if (!inode_owner_or_capable(inode))
- return -EACCES;
+ if (!inode_owner_or_capable(inode)) {
+ ret = -EACCES;
+ goto out_drop_write;
+ }
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
- goto out;
+ goto out_drop_sem;
root_flags = btrfs_root_flags(&root->root_item);
if (flags & BTRFS_SUBVOL_RDONLY)
@@ -1580,8 +1607,11 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
out_reset:
if (ret)
btrfs_set_root_flags(&root->root_item, root_flags);
-out:
+out_drop_sem:
up_write(&root->fs_info->subvol_sem);
+out_drop_write:
+ mnt_drop_write_file(file);
+out:
return ret;
}
@@ -2766,8 +2796,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
struct btrfs_path *path;
struct btrfs_key location;
struct btrfs_disk_key disk_key;
- struct btrfs_super_block *disk_super;
- u64 features;
u64 objectid = 0;
u64 dir_id;
@@ -2818,12 +2846,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- disk_super = root->fs_info->super_copy;
- features = btrfs_super_incompat_flags(disk_super);
- if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
- features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
- btrfs_set_super_incompat_flags(disk_super, features);
- }
+ btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
btrfs_end_transaction(trans, root);
return 0;
@@ -3080,19 +3103,21 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
}
static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
- void __user *arg, int reset_after_read)
+ void __user *arg)
{
struct btrfs_ioctl_get_dev_stats *sa;
int ret;
- if (reset_after_read && !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
- ret = btrfs_get_dev_stats(root, sa, reset_after_read);
+ if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
+ kfree(sa);
+ return -EPERM;
+ }
+
+ ret = btrfs_get_dev_stats(root, sa);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
@@ -3282,10 +3307,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- ret = mnt_want_write(file->f_path.mnt);
+ ret = mnt_want_write_file(file);
if (ret)
return ret;
@@ -3355,7 +3377,7 @@ out_bargs:
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
- mnt_drop_write(file->f_path.mnt);
+ mnt_drop_write_file(file);
return ret;
}
@@ -3407,6 +3429,183 @@ out:
return ret;
}
+static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
+{
+ struct btrfs_ioctl_quota_ctl_args *sa;
+ struct btrfs_trans_handle *trans = NULL;
+ int ret;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ sa = memdup_user(arg, sizeof(*sa));
+ if (IS_ERR(sa))
+ return PTR_ERR(sa);
+
+ if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ }
+
+ switch (sa->cmd) {
+ case BTRFS_QUOTA_CTL_ENABLE:
+ ret = btrfs_quota_enable(trans, root->fs_info);
+ break;
+ case BTRFS_QUOTA_CTL_DISABLE:
+ ret = btrfs_quota_disable(trans, root->fs_info);
+ break;
+ case BTRFS_QUOTA_CTL_RESCAN:
+ ret = btrfs_quota_rescan(root->fs_info);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (copy_to_user(arg, sa, sizeof(*sa)))
+ ret = -EFAULT;
+
+ if (trans) {
+ err = btrfs_commit_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+ }
+
+out:
+ kfree(sa);
+ return ret;
+}
+
+static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
+{
+ struct btrfs_ioctl_qgroup_assign_args *sa;
+ struct btrfs_trans_handle *trans;
+ int ret;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ sa = memdup_user(arg, sizeof(*sa));
+ if (IS_ERR(sa))
+ return PTR_ERR(sa);
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ /* FIXME: check if the IDs really exist */
+ if (sa->assign) {
+ ret = btrfs_add_qgroup_relation(trans, root->fs_info,
+ sa->src, sa->dst);
+ } else {
+ ret = btrfs_del_qgroup_relation(trans, root->fs_info,
+ sa->src, sa->dst);
+ }
+
+ err = btrfs_end_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+
+out:
+ kfree(sa);
+ return ret;
+}
+
+static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
+{
+ struct btrfs_ioctl_qgroup_create_args *sa;
+ struct btrfs_trans_handle *trans;
+ int ret;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ sa = memdup_user(arg, sizeof(*sa));
+ if (IS_ERR(sa))
+ return PTR_ERR(sa);
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ /* FIXME: check if the IDs really exist */
+ if (sa->create) {
+ ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
+ NULL);
+ } else {
+ ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
+ }
+
+ err = btrfs_end_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+
+out:
+ kfree(sa);
+ return ret;
+}
+
+static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
+{
+ struct btrfs_ioctl_qgroup_limit_args *sa;
+ struct btrfs_trans_handle *trans;
+ int ret;
+ int err;
+ u64 qgroupid;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ sa = memdup_user(arg, sizeof(*sa));
+ if (IS_ERR(sa))
+ return PTR_ERR(sa);
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ qgroupid = sa->qgroupid;
+ if (!qgroupid) {
+ /* take the current subvol as qgroup */
+ qgroupid = root->root_key.objectid;
+ }
+
+ /* FIXME: check if the IDs really exist */
+ ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
+
+ err = btrfs_end_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+
+out:
+ kfree(sa);
+ return ret;
+}
+
static long btrfs_ioctl_set_received_subvol(struct file *file,
void __user *arg)
{
@@ -3509,6 +3708,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_snap_create_v2(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
+ case BTRFS_IOC_SUBVOL_CREATE_V2:
+ return btrfs_ioctl_snap_create_v2(file, argp, 1);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_SUBVOL_GETFLAGS:
@@ -3575,9 +3776,15 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SEND:
return btrfs_ioctl_send(file, argp);
case BTRFS_IOC_GET_DEV_STATS:
- return btrfs_ioctl_get_dev_stats(root, argp, 0);
- case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
- return btrfs_ioctl_get_dev_stats(root, argp, 1);
+ return btrfs_ioctl_get_dev_stats(root, argp);
+ case BTRFS_IOC_QUOTA_CTL:
+ return btrfs_ioctl_quota_ctl(root, argp);
+ case BTRFS_IOC_QGROUP_ASSIGN:
+ return btrfs_ioctl_qgroup_assign(root, argp);
+ case BTRFS_IOC_QGROUP_CREATE:
+ return btrfs_ioctl_qgroup_create(root, argp);
+ case BTRFS_IOC_QGROUP_LIMIT:
+ return btrfs_ioctl_qgroup_limit(root, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 27097e8bfa39..731e2875ab93 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args {
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
+#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_FSID_SIZE 16
#define BTRFS_UUID_SIZE 16
+#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
+
+struct btrfs_qgroup_limit {
+ __u64 flags;
+ __u64 max_rfer;
+ __u64 max_excl;
+ __u64 rsv_rfer;
+ __u64 rsv_excl;
+};
+
+struct btrfs_qgroup_inherit {
+ __u64 flags;
+ __u64 num_qgroups;
+ __u64 num_ref_copies;
+ __u64 num_excl_copies;
+ struct btrfs_qgroup_limit lim;
+ __u64 qgroups[0];
+};
+
+struct btrfs_ioctl_qgroup_limit_args {
+ __u64 qgroupid;
+ struct btrfs_qgroup_limit lim;
+};
+
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;
__u64 flags;
- __u64 unused[4];
+ union {
+ struct {
+ __u64 size;
+ struct btrfs_qgroup_inherit __user *qgroup_inherit;
+ };
+ __u64 unused[4];
+ };
char name[BTRFS_SUBVOL_NAME_MAX + 1];
};
@@ -285,9 +316,13 @@ enum btrfs_dev_stat_values {
BTRFS_DEV_STAT_VALUES_MAX
};
+/* Reset statistics after reading; needs SYS_ADMIN capability */
+#define BTRFS_DEV_STATS_RESET (1ULL << 0)
+
struct btrfs_ioctl_get_dev_stats {
__u64 devid; /* in */
__u64 nr_items; /* in/out */
+ __u64 flags; /* in/out */
/* out values: */
__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
@@ -295,6 +330,24 @@ struct btrfs_ioctl_get_dev_stats {
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
+#define BTRFS_QUOTA_CTL_ENABLE 1
+#define BTRFS_QUOTA_CTL_DISABLE 2
+#define BTRFS_QUOTA_CTL_RESCAN 3
+struct btrfs_ioctl_quota_ctl_args {
+ __u64 cmd;
+ __u64 status;
+};
+
+struct btrfs_ioctl_qgroup_assign_args {
+ __u64 assign;
+ __u64 src;
+ __u64 dst;
+};
+
+struct btrfs_ioctl_qgroup_create_args {
+ __u64 create;
+ __u64 qgroupid;
+};
struct btrfs_ioctl_timespec {
__u64 sec;
__u32 nsec;
@@ -363,6 +416,8 @@ struct btrfs_ioctl_send_args {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
+ struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
@@ -386,9 +441,16 @@ struct btrfs_ioctl_send_args {
#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
struct btrfs_ioctl_received_subvol_args)
#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
+#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
+ struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
+ struct btrfs_ioctl_quota_ctl_args)
+#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
+ struct btrfs_ioctl_qgroup_assign_args)
+#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
+ struct btrfs_ioctl_qgroup_create_args)
+#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
+ struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
-#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
- struct btrfs_ioctl_get_dev_stats)
-
#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 272f911203ff..a44eff074805 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -78,13 +78,15 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
- if (atomic_dec_and_test(&eb->blocking_writers))
+ if (atomic_dec_and_test(&eb->blocking_writers) &&
+ waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
- if (atomic_dec_and_test(&eb->blocking_readers))
+ if (atomic_dec_and_test(&eb->blocking_readers) &&
+ waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
}
return;
@@ -199,7 +201,8 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
- if (atomic_dec_and_test(&eb->blocking_readers))
+ if (atomic_dec_and_test(&eb->blocking_readers) &&
+ waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
@@ -247,8 +250,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
- smp_wmb();
- wake_up(&eb->write_lock_wq);
+ smp_mb();
+ if (waitqueue_active(&eb->write_lock_wq))
+ wake_up(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
new file mode 100644
index 000000000000..bc424ae5a81a
--- /dev/null
+++ b/fs/btrfs/qgroup.c
@@ -0,0 +1,1571 @@
+/*
+ * Copyright (C) 2011 STRATO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "locking.h"
+#include "ulist.h"
+#include "ioctl.h"
+#include "backref.h"
+
+/* TODO XXX FIXME
+ * - subvol delete -> delete when ref goes to 0? delete limits also?
+ * - reorganize keys
+ * - compressed
+ * - sync
+ * - rescan
+ * - copy also limits on subvol creation
+ * - limit
+ * - caches fuer ulists
+ * - performance benchmarks
+ * - check all ioctl parameters
+ */
+
+/*
+ * one struct for each qgroup, organized in fs_info->qgroup_tree.
+ */
+struct btrfs_qgroup {
+ u64 qgroupid;
+
+ /*
+ * state
+ */
+ u64 rfer; /* referenced */
+ u64 rfer_cmpr; /* referenced compressed */
+ u64 excl; /* exclusive */
+ u64 excl_cmpr; /* exclusive compressed */
+
+ /*
+ * limits
+ */
+ u64 lim_flags; /* which limits are set */
+ u64 max_rfer;
+ u64 max_excl;
+ u64 rsv_rfer;
+ u64 rsv_excl;
+
+ /*
+ * reservation tracking
+ */
+ u64 reserved;
+
+ /*
+ * lists
+ */
+ struct list_head groups; /* groups this group is member of */
+ struct list_head members; /* groups that are members of this group */
+ struct list_head dirty; /* dirty groups */
+ struct rb_node node; /* tree of qgroups */
+
+ /*
+ * temp variables for accounting operations
+ */
+ u64 tag;
+ u64 refcnt;
+};
+
+/*
+ * glue structure to represent the relations between qgroups.
+ */
+struct btrfs_qgroup_list {
+ struct list_head next_group;
+ struct list_head next_member;
+ struct btrfs_qgroup *group;
+ struct btrfs_qgroup *member;
+};
+
+/* must be called with qgroup_lock held */
+static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
+ u64 qgroupid)
+{
+ struct rb_node *n = fs_info->qgroup_tree.rb_node;
+ struct btrfs_qgroup *qgroup;
+
+ while (n) {
+ qgroup = rb_entry(n, struct btrfs_qgroup, node);
+ if (qgroup->qgroupid < qgroupid)
+ n = n->rb_left;
+ else if (qgroup->qgroupid > qgroupid)
+ n = n->rb_right;
+ else
+ return qgroup;
+ }
+ return NULL;
+}
+
+/* must be called with qgroup_lock held */
+static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
+ u64 qgroupid)
+{
+ struct rb_node **p = &fs_info->qgroup_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct btrfs_qgroup *qgroup;
+
+ while (*p) {
+ parent = *p;
+ qgroup = rb_entry(parent, struct btrfs_qgroup, node);
+
+ if (qgroup->qgroupid < qgroupid)
+ p = &(*p)->rb_left;
+ else if (qgroup->qgroupid > qgroupid)
+ p = &(*p)->rb_right;
+ else
+ return qgroup;
+ }
+
+ qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
+ if (!qgroup)
+ return ERR_PTR(-ENOMEM);
+
+ qgroup->qgroupid = qgroupid;
+ INIT_LIST_HEAD(&qgroup->groups);
+ INIT_LIST_HEAD(&qgroup->members);
+ INIT_LIST_HEAD(&qgroup->dirty);
+
+ rb_link_node(&qgroup->node, parent, p);
+ rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
+
+ return qgroup;
+}
+
+/* must be called with qgroup_lock held */
+static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
+{
+ struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
+ struct btrfs_qgroup_list *list;
+
+ if (!qgroup)
+ return -ENOENT;
+
+ rb_erase(&qgroup->node, &fs_info->qgroup_tree);
+ list_del(&qgroup->dirty);
+
+ while (!list_empty(&qgroup->groups)) {
+ list = list_first_entry(&qgroup->groups,
+ struct btrfs_qgroup_list, next_group);
+ list_del(&list->next_group);
+ list_del(&list->next_member);
+ kfree(list);
+ }
+
+ while (!list_empty(&qgroup->members)) {
+ list = list_first_entry(&qgroup->members,
+ struct btrfs_qgroup_list, next_member);
+ list_del(&list->next_group);
+ list_del(&list->next_member);
+ kfree(list);
+ }
+ kfree(qgroup);
+
+ return 0;
+}
+
+/* must be called with qgroup_lock held */
+static int add_relation_rb(struct btrfs_fs_info *fs_info,
+ u64 memberid, u64 parentid)
+{
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup_list *list;
+
+ member = find_qgroup_rb(fs_info, memberid);
+ parent = find_qgroup_rb(fs_info, parentid);
+ if (!member || !parent)
+ return -ENOENT;
+
+ list = kzalloc(sizeof(*list), GFP_ATOMIC);
+ if (!list)
+ return -ENOMEM;
+
+ list->group = parent;
+ list->member = member;
+ list_add_tail(&list->next_group, &member->groups);
+ list_add_tail(&list->next_member, &parent->members);
+
+ return 0;
+}
+
+/* must be called with qgroup_lock held */
+static int del_relation_rb(struct btrfs_fs_info *fs_info,
+ u64 memberid, u64 parentid)
+{
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup_list *list;
+
+ member = find_qgroup_rb(fs_info, memberid);
+ parent = find_qgroup_rb(fs_info, parentid);
+ if (!member || !parent)
+ return -ENOENT;
+
+ list_for_each_entry(list, &member->groups, next_group) {
+ if (list->group == parent) {
+ list_del(&list->next_group);
+ list_del(&list->next_member);
+ kfree(list);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * The full config is read in one go, only called from open_ctree()
+ * It doesn't use any locking, as at this point we're still single-threaded
+ */
+int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_path *path = NULL;
+ struct extent_buffer *l;
+ int slot;
+ int ret = 0;
+ u64 flags = 0;
+
+ if (!fs_info->quota_enabled)
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* default this to quota off, in case no status key is found */
+ fs_info->qgroup_flags = 0;
+
+ /*
+ * pass 1: read status, all qgroup infos and limits
+ */
+ key.objectid = 0;
+ key.type = 0;
+ key.offset = 0;
+ ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
+ if (ret)
+ goto out;
+
+ while (1) {
+ struct btrfs_qgroup *qgroup;
+
+ slot = path->slots[0];
+ l = path->nodes[0];
+ btrfs_item_key_to_cpu(l, &found_key, slot);
+
+ if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
+ struct btrfs_qgroup_status_item *ptr;
+
+ ptr = btrfs_item_ptr(l, slot,
+ struct btrfs_qgroup_status_item);
+
+ if (btrfs_qgroup_status_version(l, ptr) !=
+ BTRFS_QGROUP_STATUS_VERSION) {
+ printk(KERN_ERR
+ "btrfs: old qgroup version, quota disabled\n");
+ goto out;
+ }
+ if (btrfs_qgroup_status_generation(l, ptr) !=
+ fs_info->generation) {
+ flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ printk(KERN_ERR
+ "btrfs: qgroup generation mismatch, "
+ "marked as inconsistent\n");
+ }
+ fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
+ ptr);
+ /* FIXME read scan element */
+ goto next1;
+ }
+
+ if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
+ found_key.type != BTRFS_QGROUP_LIMIT_KEY)
+ goto next1;
+
+ qgroup = find_qgroup_rb(fs_info, found_key.offset);
+ if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
+ (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
+ printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
+ flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ }
+ if (!qgroup) {
+ qgroup = add_qgroup_rb(fs_info, found_key.offset);
+ if (IS_ERR(qgroup)) {
+ ret = PTR_ERR(qgroup);
+ goto out;
+ }
+ }
+ switch (found_key.type) {
+ case BTRFS_QGROUP_INFO_KEY: {
+ struct btrfs_qgroup_info_item *ptr;
+
+ ptr = btrfs_item_ptr(l, slot,
+ struct btrfs_qgroup_info_item);
+ qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
+ qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
+ qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
+ qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
+ /* generation currently unused */
+ break;
+ }
+ case BTRFS_QGROUP_LIMIT_KEY: {
+ struct btrfs_qgroup_limit_item *ptr;
+
+ ptr = btrfs_item_ptr(l, slot,
+ struct btrfs_qgroup_limit_item);
+ qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
+ qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
+ qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
+ qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
+ qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
+ break;
+ }
+ }
+next1:
+ ret = btrfs_next_item(quota_root, path);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ break;
+ }
+ btrfs_release_path(path);
+
+ /*
+ * pass 2: read all qgroup relations
+ */
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_RELATION_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
+ if (ret)
+ goto out;
+ while (1) {
+ slot = path->slots[0];
+ l = path->nodes[0];
+ btrfs_item_key_to_cpu(l, &found_key, slot);
+
+ if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
+ goto next2;
+
+ if (found_key.objectid > found_key.offset) {
+ /* parent <- member, not needed to build config */
+ /* FIXME should we omit the key completely? */
+ goto next2;
+ }
+
+ ret = add_relation_rb(fs_info, found_key.objectid,
+ found_key.offset);
+ if (ret)
+ goto out;
+next2:
+ ret = btrfs_next_item(quota_root, path);
+ if (ret < 0)
+ goto out;
+ if (ret)
+ break;
+ }
+out:
+ fs_info->qgroup_flags |= flags;
+ if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
+ fs_info->quota_enabled = 0;
+ fs_info->pending_quota_state = 0;
+ }
+ btrfs_free_path(path);
+
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * This is only called from close_ctree() or open_ctree(), both in single-
+ * treaded paths. Clean up the in-memory structures. No locking needed.
+ */
+void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
+{
+ struct rb_node *n;
+ struct btrfs_qgroup *qgroup;
+ struct btrfs_qgroup_list *list;
+
+ while ((n = rb_first(&fs_info->qgroup_tree))) {
+ qgroup = rb_entry(n, struct btrfs_qgroup, node);
+ rb_erase(n, &fs_info->qgroup_tree);
+
+ WARN_ON(!list_empty(&qgroup->dirty));
+
+ while (!list_empty(&qgroup->groups)) {
+ list = list_first_entry(&qgroup->groups,
+ struct btrfs_qgroup_list,
+ next_group);
+ list_del(&list->next_group);
+ list_del(&list->next_member);
+ kfree(list);
+ }
+
+ while (!list_empty(&qgroup->members)) {
+ list = list_first_entry(&qgroup->members,
+ struct btrfs_qgroup_list,
+ next_member);
+ list_del(&list->next_group);
+ list_del(&list->next_member);
+ kfree(list);
+ }
+ kfree(qgroup);
+ }
+}
+
+static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *quota_root,
+ u64 src, u64 dst)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = src;
+ key.type = BTRFS_QGROUP_RELATION_KEY;
+ key.offset = dst;
+
+ ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
+
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *quota_root,
+ u64 src, u64 dst)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = src;
+ key.type = BTRFS_QGROUP_RELATION_KEY;
+ key.offset = dst;
+
+ ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, quota_root, path);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int add_qgroup_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *quota_root, u64 qgroupid)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_qgroup_info_item *qgroup_info;
+ struct btrfs_qgroup_limit_item *qgroup_limit;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_INFO_KEY;
+ key.offset = qgroupid;
+
+ ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
+ sizeof(*qgroup_info));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_qgroup_info_item);
+ btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
+ btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
+ btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
+ btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
+ btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+
+ btrfs_release_path(path);
+
+ key.type = BTRFS_QGROUP_LIMIT_KEY;
+ ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
+ sizeof(*qgroup_limit));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_qgroup_limit_item);
+ btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
+ btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
+ btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
+ btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
+ btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int del_qgroup_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *quota_root, u64 qgroupid)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_INFO_KEY;
+ key.offset = qgroupid;
+ ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, quota_root, path);
+ if (ret)
+ goto out;
+
+ btrfs_release_path(path);
+
+ key.type = BTRFS_QGROUP_LIMIT_KEY;
+ ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, quota_root, path);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 qgroupid,
+ u64 flags, u64 max_rfer, u64 max_excl,
+ u64 rsv_rfer, u64 rsv_excl)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *l;
+ struct btrfs_qgroup_limit_item *qgroup_limit;
+ int ret;
+ int slot;
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_LIMIT_KEY;
+ key.offset = qgroupid;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+
+ if (ret)
+ goto out;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+ qgroup_limit = btrfs_item_ptr(l, path->slots[0],
+ struct btrfs_qgroup_limit_item);
+ btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
+ btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
+ btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
+ btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
+ btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
+
+ btrfs_mark_buffer_dirty(l);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_qgroup *qgroup)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *l;
+ struct btrfs_qgroup_info_item *qgroup_info;
+ int ret;
+ int slot;
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_INFO_KEY;
+ key.offset = qgroup->qgroupid;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+
+ if (ret)
+ goto out;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+ qgroup_info = btrfs_item_ptr(l, path->slots[0],
+ struct btrfs_qgroup_info_item);
+ btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
+ btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
+ btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
+ btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
+ btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
+
+ btrfs_mark_buffer_dirty(l);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *l;
+ struct btrfs_qgroup_status_item *ptr;
+ int ret;
+ int slot;
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_STATUS_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+
+ if (ret)
+ goto out;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+ ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
+ btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
+ btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
+ /* XXX scan */
+
+ btrfs_mark_buffer_dirty(l);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * called with qgroup_lock held
+ */
+static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ if (!root)
+ return -EINVAL;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (1) {
+ key.objectid = 0;
+ key.offset = 0;
+ key.type = 0;
+
+ path->leave_spinning = 1;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ } else if (ret < 0) {
+ break;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ }
+ ret = 0;
+out:
+ root->fs_info->pending_quota_state = 0;
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_quota_enable(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *quota_root;
+ struct btrfs_path *path = NULL;
+ struct btrfs_qgroup_status_item *ptr;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int ret = 0;
+
+ spin_lock(&fs_info->qgroup_lock);
+ if (fs_info->quota_root) {
+ fs_info->pending_quota_state = 1;
+ spin_unlock(&fs_info->qgroup_lock);
+ goto out;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+
+ /*
+ * initially create the quota tree
+ */
+ quota_root = btrfs_create_tree(trans, fs_info,
+ BTRFS_QUOTA_TREE_OBJECTID);
+ if (IS_ERR(quota_root)) {
+ ret = PTR_ERR(quota_root);
+ goto out;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_QGROUP_STATUS_KEY;
+ key.offset = 0;
+
+ ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
+ sizeof(*ptr));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ ptr = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_qgroup_status_item);
+ btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
+ btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
+ fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
+ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
+ btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+
+ spin_lock(&fs_info->qgroup_lock);
+ fs_info->quota_root = quota_root;
+ fs_info->pending_quota_state = 1;
+ spin_unlock(&fs_info->qgroup_lock);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_quota_disable(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *quota_root;
+ int ret = 0;
+
+ spin_lock(&fs_info->qgroup_lock);
+ fs_info->quota_enabled = 0;
+ fs_info->pending_quota_state = 0;
+ quota_root = fs_info->quota_root;
+ fs_info->quota_root = NULL;
+ btrfs_free_qgroup_config(fs_info);
+ spin_unlock(&fs_info->qgroup_lock);
+
+ if (!quota_root)
+ return -EINVAL;
+
+ ret = btrfs_clean_quota_tree(trans, quota_root);
+ if (ret)
+ goto out;
+
+ ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
+ if (ret)
+ goto out;
+
+ list_del(&quota_root->dirty_list);
+
+ btrfs_tree_lock(quota_root->node);
+ clean_tree_block(trans, tree_root, quota_root->node);
+ btrfs_tree_unlock(quota_root->node);
+ btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
+
+ free_extent_buffer(quota_root->node);
+ free_extent_buffer(quota_root->commit_root);
+ kfree(quota_root);
+out:
+ return ret;
+}
+
+int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+{
+ /* FIXME */
+ return 0;
+}
+
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+{
+ struct btrfs_root *quota_root;
+ int ret = 0;
+
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ return -EINVAL;
+
+ ret = add_qgroup_relation_item(trans, quota_root, src, dst);
+ if (ret)
+ return ret;
+
+ ret = add_qgroup_relation_item(trans, quota_root, dst, src);
+ if (ret) {
+ del_qgroup_relation_item(trans, quota_root, src, dst);
+ return ret;
+ }
+
+ spin_lock(&fs_info->qgroup_lock);
+ ret = add_relation_rb(quota_root->fs_info, src, dst);
+ spin_unlock(&fs_info->qgroup_lock);
+
+ return ret;
+}
+
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+{
+ struct btrfs_root *quota_root;
+ int ret = 0;
+ int err;
+
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ return -EINVAL;
+
+ ret = del_qgroup_relation_item(trans, quota_root, src, dst);
+ err = del_qgroup_relation_item(trans, quota_root, dst, src);
+ if (err && !ret)
+ ret = err;
+
+ spin_lock(&fs_info->qgroup_lock);
+ del_relation_rb(fs_info, src, dst);
+
+ spin_unlock(&fs_info->qgroup_lock);
+
+ return ret;
+}
+
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
+{
+ struct btrfs_root *quota_root;
+ struct btrfs_qgroup *qgroup;
+ int ret = 0;
+
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ return -EINVAL;
+
+ ret = add_qgroup_item(trans, quota_root, qgroupid);
+
+ spin_lock(&fs_info->qgroup_lock);
+ qgroup = add_qgroup_rb(fs_info, qgroupid);
+ spin_unlock(&fs_info->qgroup_lock);
+
+ if (IS_ERR(qgroup))
+ ret = PTR_ERR(qgroup);
+
+ return ret;
+}
+
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid)
+{
+ struct btrfs_root *quota_root;
+ int ret = 0;
+
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ return -EINVAL;
+
+ ret = del_qgroup_item(trans, quota_root, qgroupid);
+
+ spin_lock(&fs_info->qgroup_lock);
+ del_qgroup_rb(quota_root->fs_info, qgroupid);
+
+ spin_unlock(&fs_info->qgroup_lock);
+
+ return ret;
+}
+
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 qgroupid,
+ struct btrfs_qgroup_limit *limit)
+{
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_qgroup *qgroup;
+ int ret = 0;
+
+ if (!quota_root)
+ return -EINVAL;
+
+ ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
+ limit->flags, limit->max_rfer,
+ limit->max_excl, limit->rsv_rfer,
+ limit->rsv_excl);
+ if (ret) {
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ printk(KERN_INFO "unable to update quota limit for %llu\n",
+ (unsigned long long)qgroupid);
+ }
+
+ spin_lock(&fs_info->qgroup_lock);
+
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+ qgroup->lim_flags = limit->flags;
+ qgroup->max_rfer = limit->max_rfer;
+ qgroup->max_excl = limit->max_excl;
+ qgroup->rsv_rfer = limit->rsv_rfer;
+ qgroup->rsv_excl = limit->rsv_excl;
+
+unlock:
+ spin_unlock(&fs_info->qgroup_lock);
+
+ return ret;
+}
+
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup)
+{
+ if (list_empty(&qgroup->dirty))
+ list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
+}
+
+/*
+ * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
+ * the modification into a list that's later used by btrfs_end_transaction to
+ * pass the recorded modifications on to btrfs_qgroup_account_ref.
+ */
+int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op)
+{
+ struct qgroup_update *u;
+
+ BUG_ON(!trans->delayed_ref_elem.seq);
+ u = kmalloc(sizeof(*u), GFP_NOFS);
+ if (!u)
+ return -ENOMEM;
+
+ u->node = node;
+ u->extent_op = extent_op;
+ list_add_tail(&u->list, &trans->qgroup_ref_list);
+
+ return 0;
+}
+
+/*
+ * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
+ * from the fs. First, all roots referencing the extent are searched, and
+ * then the space is accounted accordingly to the different roots. The
+ * accounting algorithm works in 3 steps documented inline.
+ */
+int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_node *node,
+ struct btrfs_delayed_extent_op *extent_op)
+{
+ struct btrfs_key ins;
+ struct btrfs_root *quota_root;
+ u64 ref_root;
+ struct btrfs_qgroup *qgroup;
+ struct ulist_node *unode;
+ struct ulist *roots = NULL;
+ struct ulist *tmp = NULL;
+ struct ulist_iterator uiter;
+ u64 seq;
+ int ret = 0;
+ int sgn;
+
+ if (!fs_info->quota_enabled)
+ return 0;
+
+ BUG_ON(!fs_info->quota_root);
+
+ ins.objectid = node->bytenr;
+ ins.offset = node->num_bytes;
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+ if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
+ node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ struct btrfs_delayed_tree_ref *ref;
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ref_root = ref->root;
+ } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
+ node->type == BTRFS_SHARED_DATA_REF_KEY) {
+ struct btrfs_delayed_data_ref *ref;
+ ref = btrfs_delayed_node_to_data_ref(node);
+ ref_root = ref->root;
+ } else {
+ BUG();
+ }
+
+ if (!is_fstree(ref_root)) {
+ /*
+ * non-fs-trees are not being accounted
+ */
+ return 0;
+ }
+
+ switch (node->action) {
+ case BTRFS_ADD_DELAYED_REF:
+ case BTRFS_ADD_DELAYED_EXTENT:
+ sgn = 1;
+ break;
+ case BTRFS_DROP_DELAYED_REF:
+ sgn = -1;
+ break;
+ case BTRFS_UPDATE_DELAYED_HEAD:
+ return 0;
+ default:
+ BUG();
+ }
+
+ /*
+ * the delayed ref sequence number we pass depends on the direction of
+ * the operation. for add operations, we pass (node->seq - 1) to skip
+ * the delayed ref's current sequence number, because we need the state
+ * of the tree before the add operation. for delete operations, we pass
+ * (node->seq) to include the delayed ref's current sequence number,
+ * because we need the state of the tree after the delete operation.
+ */
+ ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
+ sgn > 0 ? node->seq - 1 : node->seq, &roots);
+ if (ret < 0)
+ goto out;
+
+ spin_lock(&fs_info->qgroup_lock);
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ goto unlock;
+
+ qgroup = find_qgroup_rb(fs_info, ref_root);
+ if (!qgroup)
+ goto unlock;
+
+ /*
+ * step 1: for each old ref, visit all nodes once and inc refcnt
+ */
+ tmp = ulist_alloc(GFP_ATOMIC);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ seq = fs_info->qgroup_seq;
+ fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ struct ulist_node *tmp_unode;
+ struct ulist_iterator tmp_uiter;
+ struct btrfs_qgroup *qg;
+
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ulist_reinit(tmp);
+ /* XXX id not needed */
+ ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC);
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)tmp_unode->aux;
+ if (qg->refcnt < seq)
+ qg->refcnt = seq + 1;
+ else
+ ++qg->refcnt;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ulist_add(tmp, glist->group->qgroupid,
+ (unsigned long)glist->group,
+ GFP_ATOMIC);
+ }
+ }
+ }
+
+ /*
+ * step 2: walk from the new root
+ */
+ ulist_reinit(tmp);
+ ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tmp, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)unode->aux;
+ if (qg->refcnt < seq) {
+ /* not visited by step 1 */
+ qg->rfer += sgn * node->num_bytes;
+ qg->rfer_cmpr += sgn * node->num_bytes;
+ if (roots->nnodes == 0) {
+ qg->excl += sgn * node->num_bytes;
+ qg->excl_cmpr += sgn * node->num_bytes;
+ }
+ qgroup_dirty(fs_info, qg);
+ }
+ WARN_ON(qg->tag >= seq);
+ qg->tag = seq;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ulist_add(tmp, glist->group->qgroupid,
+ (unsigned long)glist->group, GFP_ATOMIC);
+ }
+ }
+
+ /*
+ * step 3: walk again from old refs
+ */
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct ulist_node *tmp_unode;
+ struct ulist_iterator tmp_uiter;
+
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ulist_reinit(tmp);
+ ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC);
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)tmp_unode->aux;
+ if (qg->tag == seq)
+ continue;
+
+ if (qg->refcnt - seq == roots->nnodes) {
+ qg->excl -= sgn * node->num_bytes;
+ qg->excl_cmpr -= sgn * node->num_bytes;
+ qgroup_dirty(fs_info, qg);
+ }
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ulist_add(tmp, glist->group->qgroupid,
+ (unsigned long)glist->group,
+ GFP_ATOMIC);
+ }
+ }
+ }
+ ret = 0;
+unlock:
+ spin_unlock(&fs_info->qgroup_lock);
+out:
+ ulist_free(roots);
+ ulist_free(tmp);
+
+ return ret;
+}
+
+/*
+ * called from commit_transaction. Writes all changed qgroups to disk.
+ */
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ int ret = 0;
+
+ if (!quota_root)
+ goto out;
+
+ fs_info->quota_enabled = fs_info->pending_quota_state;
+
+ spin_lock(&fs_info->qgroup_lock);
+ while (!list_empty(&fs_info->dirty_qgroups)) {
+ struct btrfs_qgroup *qgroup;
+ qgroup = list_first_entry(&fs_info->dirty_qgroups,
+ struct btrfs_qgroup, dirty);
+ list_del_init(&qgroup->dirty);
+ spin_unlock(&fs_info->qgroup_lock);
+ ret = update_qgroup_info_item(trans, quota_root, qgroup);
+ if (ret)
+ fs_info->qgroup_flags |=
+ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ spin_lock(&fs_info->qgroup_lock);
+ }
+ if (fs_info->quota_enabled)
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
+ else
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
+ spin_unlock(&fs_info->qgroup_lock);
+
+ ret = update_qgroup_status_item(trans, fs_info, quota_root);
+ if (ret)
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+
+out:
+
+ return ret;
+}
+
+/*
+ * copy the acounting information between qgroups. This is necessary when a
+ * snapshot or a subvolume is created
+ */
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
+ struct btrfs_qgroup_inherit *inherit)
+{
+ int ret = 0;
+ int i;
+ u64 *i_qgroups;
+ struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_qgroup *srcgroup;
+ struct btrfs_qgroup *dstgroup;
+ u32 level_size = 0;
+
+ if (!fs_info->quota_enabled)
+ return 0;
+
+ if (!quota_root)
+ return -EINVAL;
+
+ /*
+ * create a tracking group for the subvol itself
+ */
+ ret = add_qgroup_item(trans, quota_root, objectid);
+ if (ret)
+ goto out;
+
+ if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
+ ret = update_qgroup_limit_item(trans, quota_root, objectid,
+ inherit->lim.flags,
+ inherit->lim.max_rfer,
+ inherit->lim.max_excl,
+ inherit->lim.rsv_rfer,
+ inherit->lim.rsv_excl);
+ if (ret)
+ goto out;
+ }
+
+ if (srcid) {
+ struct btrfs_root *srcroot;
+ struct btrfs_key srckey;
+ int srcroot_level;
+
+ srckey.objectid = srcid;
+ srckey.type = BTRFS_ROOT_ITEM_KEY;
+ srckey.offset = (u64)-1;
+ srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
+ if (IS_ERR(srcroot)) {
+ ret = PTR_ERR(srcroot);
+ goto out;
+ }
+
+ rcu_read_lock();
+ srcroot_level = btrfs_header_level(srcroot->node);
+ level_size = btrfs_level_size(srcroot, srcroot_level);
+ rcu_read_unlock();
+ }
+
+ /*
+ * add qgroup to all inherited groups
+ */
+ if (inherit) {
+ i_qgroups = (u64 *)(inherit + 1);
+ for (i = 0; i < inherit->num_qgroups; ++i) {
+ ret = add_qgroup_relation_item(trans, quota_root,
+ objectid, *i_qgroups);
+ if (ret)
+ goto out;
+ ret = add_qgroup_relation_item(trans, quota_root,
+ *i_qgroups, objectid);
+ if (ret)
+ goto out;
+ ++i_qgroups;
+ }
+ }
+
+
+ spin_lock(&fs_info->qgroup_lock);
+
+ dstgroup = add_qgroup_rb(fs_info, objectid);
+ if (!dstgroup)
+ goto unlock;
+
+ if (srcid) {
+ srcgroup = find_qgroup_rb(fs_info, srcid);
+ if (!srcgroup)
+ goto unlock;
+ dstgroup->rfer = srcgroup->rfer - level_size;
+ dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
+ srcgroup->excl = level_size;
+ srcgroup->excl_cmpr = level_size;
+ qgroup_dirty(fs_info, dstgroup);
+ qgroup_dirty(fs_info, srcgroup);
+ }
+
+ if (!inherit)
+ goto unlock;
+
+ i_qgroups = (u64 *)(inherit + 1);
+ for (i = 0; i < inherit->num_qgroups; ++i) {
+ ret = add_relation_rb(quota_root->fs_info, objectid,
+ *i_qgroups);
+ if (ret)
+ goto unlock;
+ ++i_qgroups;
+ }
+
+ for (i = 0; i < inherit->num_ref_copies; ++i) {
+ struct btrfs_qgroup *src;
+ struct btrfs_qgroup *dst;
+
+ src = find_qgroup_rb(fs_info, i_qgroups[0]);
+ dst = find_qgroup_rb(fs_info, i_qgroups[1]);
+
+ if (!src || !dst) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ dst->rfer = src->rfer - level_size;
+ dst->rfer_cmpr = src->rfer_cmpr - level_size;
+ i_qgroups += 2;
+ }
+ for (i = 0; i < inherit->num_excl_copies; ++i) {
+ struct btrfs_qgroup *src;
+ struct btrfs_qgroup *dst;
+
+ src = find_qgroup_rb(fs_info, i_qgroups[0]);
+ dst = find_qgroup_rb(fs_info, i_qgroups[1]);
+
+ if (!src || !dst) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ dst->excl = src->excl + level_size;
+ dst->excl_cmpr = src->excl_cmpr + level_size;
+ i_qgroups += 2;
+ }
+
+unlock:
+ spin_unlock(&fs_info->qgroup_lock);
+out:
+ return ret;
+}
+
+/*
+ * reserve some space for a qgroup and all its parents. The reservation takes
+ * place with start_transaction or dealloc_reserve, similar to ENOSPC
+ * accounting. If not enough space is available, EDQUOT is returned.
+ * We assume that the requested space is new for all qgroups.
+ */
+int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
+{
+ struct btrfs_root *quota_root;
+ struct btrfs_qgroup *qgroup;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ u64 ref_root = root->root_key.objectid;
+ int ret = 0;
+ struct ulist *ulist = NULL;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+
+ if (!is_fstree(ref_root))
+ return 0;
+
+ if (num_bytes == 0)
+ return 0;
+
+ spin_lock(&fs_info->qgroup_lock);
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ goto out;
+
+ qgroup = find_qgroup_rb(fs_info, ref_root);
+ if (!qgroup)
+ goto out;
+
+ /*
+ * in a first step, we check all affected qgroups if any limits would
+ * be exceeded
+ */
+ ulist = ulist_alloc(GFP_ATOMIC);
+ ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(ulist, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)unode->aux;
+
+ if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
+ qg->reserved + qg->rfer + num_bytes >
+ qg->max_rfer)
+ ret = -EDQUOT;
+
+ if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
+ qg->reserved + qg->excl + num_bytes >
+ qg->max_excl)
+ ret = -EDQUOT;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ulist_add(ulist, glist->group->qgroupid,
+ (unsigned long)glist->group, GFP_ATOMIC);
+ }
+ }
+ if (ret)
+ goto out;
+
+ /*
+ * no limits exceeded, now record the reservation into all qgroups
+ */
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(ulist, &uiter))) {
+ struct btrfs_qgroup *qg;
+
+ qg = (struct btrfs_qgroup *)unode->aux;
+
+ qg->reserved += num_bytes;
+ }
+
+out:
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(ulist);
+
+ return ret;
+}
+
+void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
+{
+ struct btrfs_root *quota_root;
+ struct btrfs_qgroup *qgroup;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct ulist *ulist = NULL;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ u64 ref_root = root->root_key.objectid;
+
+ if (!is_fstree(ref_root))
+ return;
+
+ if (num_bytes == 0)
+ return;
+
+ spin_lock(&fs_info->qgroup_lock);
+
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ goto out;
+
+ qgroup = find_qgroup_rb(fs_info, ref_root);
+ if (!qgroup)
+ goto out;
+
+ ulist = ulist_alloc(GFP_ATOMIC);
+ ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC);
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(ulist, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)unode->aux;
+
+ qg->reserved -= num_bytes;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ulist_add(ulist, glist->group->qgroupid,
+ (unsigned long)glist->group, GFP_ATOMIC);
+ }
+ }
+
+out:
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(ulist);
+}
+
+void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
+{
+ if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
+ return;
+ printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
+ trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
+ trans->delayed_ref_elem.seq);
+ BUG();
+}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 646ee21bb035..c5dbd9149679 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1239,10 +1239,11 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
if (rb_node) {
- kfree(node);
btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
"for start=%llu while inserting into relocation "
"tree\n");
+ kfree(node);
+ return -EEXIST;
}
list_add_tail(&root->root_list, &rc->reloc_roots);
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index c6ffa5812419..b976597b0721 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -17,15 +17,27 @@
*/
#include <linux/highmem.h>
+#include <asm/unaligned.h>
-/* this is some deeply nasty code. ctree.h has a different
- * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
+#include "ctree.h"
+
+static inline u8 get_unaligned_le8(const void *p)
+{
+ return *(u8 *)p;
+}
+
+static inline void put_unaligned_le8(u8 val, void *p)
+{
+ *(u8 *)p = val;
+}
+
+/*
+ * this is some deeply nasty code.
*
* The end result is that anyone who #includes ctree.h gets a
- * declaration for the btrfs_set_foo functions and btrfs_foo functions
- *
- * This file declares the macros and then #includes ctree.h, which results
- * in cpp creating the function here based on the template below.
+ * declaration for the btrfs_set_foo functions and btrfs_foo functions,
+ * which are wappers of btrfs_set_token_#bits functions and
+ * btrfs_get_token_#bits functions, which are defined in this file.
*
* These setget functions do all the extent_buffer related mapping
* required to efficiently read and write specific fields in the extent
@@ -33,103 +45,93 @@
* an unsigned long offset into the extent buffer which has been
* cast to a specific type. This gives us all the gcc type checking.
*
- * The extent buffer api is used to do all the kmapping and page
- * spanning work required to get extent buffers in highmem and have
- * a metadata blocksize different from the page size.
- *
- * The macro starts with a simple function prototype declaration so that
- * sparse won't complain about it being static.
+ * The extent buffer api is used to do the page spanning work required to
+ * have a metadata blocksize different from the page size.
*/
-#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
-void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
-void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \
-u##bits btrfs_token_##name(struct extent_buffer *eb, \
- type *s, struct btrfs_map_token *token) \
+#define DEFINE_BTRFS_SETGET_BITS(bits) \
+u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
+ unsigned long off, \
+ struct btrfs_map_token *token) \
{ \
- unsigned long part_offset = (unsigned long)s; \
- unsigned long offset = part_offset + offsetof(type, member); \
- type *p; \
- int err; \
- char *kaddr; \
- unsigned long map_start; \
- unsigned long map_len; \
- unsigned long mem_len = sizeof(((type *)0)->member); \
- u##bits res; \
- if (token && token->kaddr && token->offset <= offset && \
- token->eb == eb && \
- (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
- kaddr = token->kaddr; \
- p = (type *)(kaddr + part_offset - token->offset); \
- res = le##bits##_to_cpu(p->member); \
- return res; \
- } \
- err = map_private_extent_buffer(eb, offset, \
- mem_len, \
- &kaddr, &map_start, &map_len); \
- if (err) { \
- __le##bits leres; \
- read_eb_member(eb, s, type, member, &leres); \
- return le##bits##_to_cpu(leres); \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- res = le##bits##_to_cpu(p->member); \
- if (token) { \
- token->kaddr = kaddr; \
- token->offset = map_start; \
- token->eb = eb; \
- } \
- return res; \
+ unsigned long part_offset = (unsigned long)ptr; \
+ unsigned long offset = part_offset + off; \
+ void *p; \
+ int err; \
+ char *kaddr; \
+ unsigned long map_start; \
+ unsigned long map_len; \
+ int size = sizeof(u##bits); \
+ u##bits res; \
+ \
+ if (token && token->kaddr && token->offset <= offset && \
+ token->eb == eb && \
+ (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
+ kaddr = token->kaddr; \
+ p = kaddr + part_offset - token->offset; \
+ res = get_unaligned_le##bits(p + off); \
+ return res; \
+ } \
+ err = map_private_extent_buffer(eb, offset, size, \
+ &kaddr, &map_start, &map_len); \
+ if (err) { \
+ __le##bits leres; \
+ \
+ read_extent_buffer(eb, &leres, offset, size); \
+ return le##bits##_to_cpu(leres); \
+ } \
+ p = kaddr + part_offset - map_start; \
+ res = get_unaligned_le##bits(p + off); \
+ if (token) { \
+ token->kaddr = kaddr; \
+ token->offset = map_start; \
+ token->eb = eb; \
+ } \
+ return res; \
} \
-void btrfs_set_token_##name(struct extent_buffer *eb, \
- type *s, u##bits val, struct btrfs_map_token *token) \
+void btrfs_set_token_##bits(struct extent_buffer *eb, \
+ void *ptr, unsigned long off, u##bits val, \
+ struct btrfs_map_token *token) \
{ \
- unsigned long part_offset = (unsigned long)s; \
- unsigned long offset = part_offset + offsetof(type, member); \
- type *p; \
- int err; \
- char *kaddr; \
- unsigned long map_start; \
- unsigned long map_len; \
- unsigned long mem_len = sizeof(((type *)0)->member); \
- if (token && token->kaddr && token->offset <= offset && \
- token->eb == eb && \
- (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
- kaddr = token->kaddr; \
- p = (type *)(kaddr + part_offset - token->offset); \
- p->member = cpu_to_le##bits(val); \
- return; \
- } \
- err = map_private_extent_buffer(eb, offset, \
- mem_len, \
- &kaddr, &map_start, &map_len); \
- if (err) { \
- __le##bits val2; \
- val2 = cpu_to_le##bits(val); \
- write_eb_member(eb, s, type, member, &val2); \
- return; \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- p->member = cpu_to_le##bits(val); \
- if (token) { \
- token->kaddr = kaddr; \
- token->offset = map_start; \
- token->eb = eb; \
- } \
-} \
-void btrfs_set_##name(struct extent_buffer *eb, \
- type *s, u##bits val) \
-{ \
- btrfs_set_token_##name(eb, s, val, NULL); \
-} \
-u##bits btrfs_##name(struct extent_buffer *eb, \
- type *s) \
-{ \
- return btrfs_token_##name(eb, s, NULL); \
-} \
+ unsigned long part_offset = (unsigned long)ptr; \
+ unsigned long offset = part_offset + off; \
+ void *p; \
+ int err; \
+ char *kaddr; \
+ unsigned long map_start; \
+ unsigned long map_len; \
+ int size = sizeof(u##bits); \
+ \
+ if (token && token->kaddr && token->offset <= offset && \
+ token->eb == eb && \
+ (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
+ kaddr = token->kaddr; \
+ p = kaddr + part_offset - token->offset; \
+ put_unaligned_le##bits(val, p + off); \
+ return; \
+ } \
+ err = map_private_extent_buffer(eb, offset, size, \
+ &kaddr, &map_start, &map_len); \
+ if (err) { \
+ __le##bits val2; \
+ \
+ val2 = cpu_to_le##bits(val); \
+ write_extent_buffer(eb, &val2, offset, size); \
+ return; \
+ } \
+ p = kaddr + part_offset - map_start; \
+ put_unaligned_le##bits(val, p + off); \
+ if (token) { \
+ token->kaddr = kaddr; \
+ token->offset = map_start; \
+ token->eb = eb; \
+ } \
+}
-#include "ctree.h"
+DEFINE_BTRFS_SETGET_BITS(8)
+DEFINE_BTRFS_SETGET_BITS(16)
+DEFINE_BTRFS_SETGET_BITS(32)
+DEFINE_BTRFS_SETGET_BITS(64)
void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e23991574fdf..75ee2c7791f0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -396,15 +396,23 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
strcmp(args[0].from, "zlib") == 0) {
compress_type = "zlib";
info->compress_type = BTRFS_COMPRESS_ZLIB;
+ btrfs_set_opt(info->mount_opt, COMPRESS);
} else if (strcmp(args[0].from, "lzo") == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
+ btrfs_set_opt(info->mount_opt, COMPRESS);
+ btrfs_set_fs_incompat(info, COMPRESS_LZO);
+ } else if (strncmp(args[0].from, "no", 2) == 0) {
+ compress_type = "no";
+ info->compress_type = BTRFS_COMPRESS_NONE;
+ btrfs_clear_opt(info->mount_opt, COMPRESS);
+ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
+ compress_force = false;
} else {
ret = -EINVAL;
goto out;
}
- btrfs_set_opt(info->mount_opt, COMPRESS);
if (compress_force) {
btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
pr_info("btrfs: force %s compression\n",
@@ -1455,6 +1463,13 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
break;
+ case BTRFS_IOC_DEVICES_READY:
+ ret = btrfs_scan_one_device(vol->name, FMODE_READ,
+ &btrfs_fs_type, &fs_devices);
+ if (ret)
+ break;
+ ret = !(fs_devices->num_devices == fs_devices->total_devices);
+ break;
}
kfree(vol);
@@ -1477,16 +1492,6 @@ static int btrfs_unfreeze(struct super_block *sb)
return 0;
}
-static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
-{
- int ret;
-
- ret = btrfs_dirty_inode(inode);
- if (ret)
- printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
- "error %d\n", btrfs_ino(inode), ret);
-}
-
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -1526,7 +1531,6 @@ static const struct super_operations btrfs_super_ops = {
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
.write_inode = btrfs_write_inode,
- .dirty_inode = btrfs_fs_dirty_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a21f3085a334..7ac7cdcc294e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -39,7 +39,6 @@ void put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
- WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
@@ -101,8 +100,8 @@ loop:
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
cur_trans = fs_info->running_transaction;
goto loop;
- } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- spin_unlock(&root->fs_info->trans_lock);
+ } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ spin_unlock(&fs_info->trans_lock);
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
return -EROFS;
}
@@ -127,7 +126,6 @@ loop:
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
- cur_trans->delayed_refs.seq = 1;
/*
* although the tree mod log is per file system and not per transaction,
@@ -146,10 +144,8 @@ loop:
}
atomic_set(&fs_info->tree_mod_seq, 0);
- init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
- INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -300,6 +296,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
int ret;
+ u64 qgroup_reserved = 0;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
return ERR_PTR(-EROFS);
@@ -318,6 +315,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
+ if (root->fs_info->quota_enabled &&
+ is_fstree(root->root_key.objectid)) {
+ qgroup_reserved = num_items * root->leafsize;
+ ret = btrfs_qgroup_reserve(root, qgroup_reserved);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
@@ -350,11 +355,16 @@ again:
h->transaction = cur_trans;
h->blocks_used = 0;
h->bytes_reserved = 0;
+ h->root = root;
h->delayed_ref_updates = 0;
h->use_count = 1;
+ h->adding_csums = 0;
h->block_rsv = NULL;
h->orig_rsv = NULL;
h->aborted = 0;
+ h->qgroup_reserved = qgroup_reserved;
+ h->delayed_ref_elem.seq = 0;
+ INIT_LIST_HEAD(&h->qgroup_ref_list);
smp_mb();
if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -474,7 +484,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
- struct btrfs_block_rsv *rsv = trans->block_rsv;
int updates;
int err;
@@ -482,12 +491,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
return 1;
- /*
- * We need to do this in case we're deleting csums so the global block
- * rsv get's used instead of the csum block rsv.
- */
- trans->block_rsv = NULL;
-
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates) {
@@ -496,8 +499,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
return err;
}
- trans->block_rsv = rsv;
-
return should_end_transaction(trans, root);
}
@@ -514,8 +515,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0;
}
+ /*
+ * do the qgroup accounting as early as possible
+ */
+ err = btrfs_delayed_refs_qgroup_accounting(trans, info);
+
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
+ /*
+ * the same root has to be passed to start_transaction and
+ * end_transaction. Subvolume quota depends on this.
+ */
+ WARN_ON(trans->root != root);
+
+ if (trans->qgroup_reserved) {
+ btrfs_qgroup_free(root, trans->qgroup_reserved);
+ trans->qgroup_reserved = 0;
+ }
+
while (count < 2) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
@@ -528,6 +545,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
count++;
}
+ btrfs_trans_release_metadata(trans, root);
+ trans->block_rsv = NULL;
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root)) {
@@ -568,6 +587,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
err = -EIO;
}
+ assert_qgroups_uptodate(trans);
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -786,6 +806,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
+ ret = btrfs_run_qgroups(trans, root->fs_info);
+ BUG_ON(ret);
+
+ /* run_qgroups might have added some more refs */
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+ BUG_ON(ret);
+
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
@@ -960,6 +987,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
}
+ ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
+ objectid, pending->inherit);
+ kfree(pending->inherit);
+ if (ret) {
+ pending->error = ret;
+ goto fail;
+ }
+
key.objectid = objectid;
key.offset = (u64)-1;
key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1286,9 +1321,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_run_ordered_operations(root, 0);
- btrfs_trans_release_metadata(trans, root);
- trans->block_rsv = NULL;
-
if (cur_trans->aborted)
goto cleanup_transaction;
@@ -1299,6 +1331,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
+ btrfs_trans_release_metadata(trans, root);
+ trans->block_rsv = NULL;
+
cur_trans = trans->transaction;
/*
@@ -1347,7 +1382,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
spin_unlock(&root->fs_info->trans_lock);
}
- if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
+ if (!btrfs_test_opt(root, SSD) &&
+ (now < cur_trans->start_time || now - cur_trans->start_time < 1))
should_grow = 1;
do {
@@ -1369,6 +1405,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction;
/*
+ * running the delayed items may have added new refs. account
+ * them now so that they hinder processing of more delayed refs
+ * as little as possible.
+ */
+ btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+
+ /*
* rename don't use btrfs_join_transaction, so, once we
* set the transaction to blocked above, we aren't going
* to get any new ordered operations. We can safely run
@@ -1480,6 +1523,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->chunk_root->node);
switch_commit_root(root->fs_info->chunk_root);
+ assert_qgroups_uptodate(trans);
update_super_roots(root);
if (!root->fs_info->log_root_recovering) {
@@ -1549,6 +1593,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return ret;
cleanup_transaction:
+ btrfs_trans_release_metadata(trans, root);
+ trans->block_rsv = NULL;
btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
// WARN_ON(1);
if (current->journal_info == trans)
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index fe27379e368b..e8b8416c688b 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -20,6 +20,7 @@
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
#include "delayed-ref.h"
+#include "ctree.h"
struct btrfs_transaction {
u64 transid;
@@ -49,6 +50,7 @@ struct btrfs_transaction {
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
+ u64 qgroup_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
unsigned long blocks_used;
@@ -57,12 +59,22 @@ struct btrfs_trans_handle {
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv;
int aborted;
+ int adding_csums;
+ /*
+ * this root is only needed to validate that the root passed to
+ * start_transaction is the same as the one passed to end_transaction.
+ * Subvolume quota depends on this
+ */
+ struct btrfs_root *root;
+ struct seq_list delayed_ref_elem;
+ struct list_head qgroup_ref_list;
};
struct btrfs_pending_snapshot {
struct dentry *dentry;
struct btrfs_root *root;
struct btrfs_root *snap;
+ struct btrfs_qgroup_inherit *inherit;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
/* extra metadata reseration for relocation */
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8abeae4224f9..c86670f4f285 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -637,7 +637,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
}
inode_set_bytes(inode, saved_nbytes);
- btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, inode);
out:
if (inode)
iput(inode);
@@ -1133,7 +1133,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (ret == 0) {
btrfs_inc_nlink(inode);
- btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
} else {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ecaad40e7ef4..b8708f994e67 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -429,6 +429,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
mutex_init(&fs_devices->device_list_mutex);
fs_devices->latest_devid = orig->latest_devid;
fs_devices->latest_trans = orig->latest_trans;
+ fs_devices->total_devices = orig->total_devices;
memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
/* We have held the volume lock, it is safe to get the devices. */
@@ -739,6 +740,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
int ret;
u64 devid;
u64 transid;
+ u64 total_devices;
flags |= FMODE_EXCL;
bdev = blkdev_get_by_path(path, flags, holder);
@@ -760,6 +762,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
disk_super = (struct btrfs_super_block *)bh->b_data;
devid = btrfs_stack_device_id(&disk_super->dev_item);
transid = btrfs_super_generation(disk_super);
+ total_devices = btrfs_super_num_devices(disk_super);
if (disk_super->label[0])
printk(KERN_INFO "device label %s ", disk_super->label);
else
@@ -767,7 +770,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
printk(KERN_CONT "devid %llu transid %llu %s\n",
(unsigned long long)devid, (unsigned long long)transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
-
+ if (!ret && fs_devices_ret)
+ (*fs_devices_ret)->total_devices = total_devices;
brelse(bh);
error_close:
mutex_unlock(&uuid_mutex);
@@ -1433,6 +1437,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
list_del_rcu(&device->dev_list);
device->fs_devices->num_devices--;
+ device->fs_devices->total_devices--;
if (device->missing)
root->fs_info->fs_devices->missing_devices--;
@@ -1550,6 +1555,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
fs_devices->seeding = 0;
fs_devices->num_devices = 0;
fs_devices->open_devices = 0;
+ fs_devices->total_devices = 0;
fs_devices->seed = seed_devices;
generate_random_uuid(fs_devices->fsid);
@@ -1749,6 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
root->fs_info->fs_devices->num_devices++;
root->fs_info->fs_devices->open_devices++;
root->fs_info->fs_devices->rw_devices++;
+ root->fs_info->fs_devices->total_devices++;
if (device->can_discard)
root->fs_info->fs_devices->num_can_discard++;
root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
@@ -4736,9 +4743,6 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
- printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
- rcu_str_deref(device->name),
- (unsigned long long)device->devid);
__btrfs_reset_dev_stats(device);
device->dev_stats_valid = 1;
btrfs_release_path(path);
@@ -4880,6 +4884,14 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
{
+ int i;
+
+ for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
+ if (btrfs_dev_stat_read(dev, i) != 0)
+ break;
+ if (i == BTRFS_DEV_STAT_VALUES_MAX)
+ return; /* all values == 0, suppress message */
+
printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
rcu_str_deref(dev->name),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
@@ -4890,8 +4902,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
}
int btrfs_get_dev_stats(struct btrfs_root *root,
- struct btrfs_ioctl_get_dev_stats *stats,
- int reset_after_read)
+ struct btrfs_ioctl_get_dev_stats *stats)
{
struct btrfs_device *dev;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
@@ -4909,7 +4920,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
printk(KERN_WARNING
"btrfs: get dev_stats failed, not yet valid\n");
return -ENODEV;
- } else if (reset_after_read) {
+ } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
if (stats->nr_items > i)
stats->values[i] =
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 95f6637614db..5479325987b3 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -126,6 +126,7 @@ struct btrfs_fs_devices {
u64 missing_devices;
u64 total_rw_bytes;
u64 num_can_discard;
+ u64 total_devices;
struct block_device *latest_bdev;
/* all of the devices in the FS, protected by a mutex
@@ -293,8 +294,7 @@ struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
- struct btrfs_ioctl_get_dev_stats *stats,
- int reset_after_read);
+ struct btrfs_ioctl_get_dev_stats *stats);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);