From 31e818fe7375d60de9953051f7bd1615cebc3681 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 20 Feb 2015 18:00:26 +0100 Subject: btrfs: cleanup, use kmalloc_array/kcalloc array helpers Convert kmalloc(nr * size, ..) to kmalloc_array that does additional overflow checks, the zeroing variant is kcalloc. Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8564d8ce03de..21e4affafa3e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -463,7 +463,7 @@ again: */ if (inode_need_compress(inode)) { WARN_ON(pages); - pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { /* just bail out to the uncompressed code */ goto cont; -- cgit v1.2.3 From 4a3d1caf8a2c16c55424a0768eade54ee0922341 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 13 Mar 2015 01:58:46 -0700 Subject: fs: btrfs: Add missing include file Building alpha:allmodconfig fails with fs/btrfs/inode.c: In function 'check_direct_IO': fs/btrfs/inode.c:8050:2: error: implicit declaration of function 'iov_iter_alignment' due to a missing include file. Fixes: 3737c63e1fb0 ("fs: move struct kiocb to fs.h") Cc: Christoph Hellwig Signed-off-by: Guenter Roeck Acked-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb088bd8f7f0..97bc1ffc9c7b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -- cgit v1.2.3 From 28ed1345a50491d78e1454ad4005dc5d3557a69e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 17 Dec 2014 09:41:04 -0800 Subject: btrfs: actively run the delayed refs while deleting large files When we are deleting large files with large extents, we are building up a huge set of delayed refs for processing. Truncate isn't checking often enough to see if we need to back off and process those, or let a commit proceed. The end result is long stalls after the rm, and very long commit times. During the commits, other processes back up waiting to start new transactions and we get into trouble. Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 2 ++ fs/btrfs/inode.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.c | 2 +- fs/btrfs/tree-log.c | 8 ++++++-- 4 files changed, 52 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 764528a4f6fd..c51482031edf 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -235,6 +235,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, /* * We don't need an orphan item because truncating the free space cache * will never be split across transactions. + * We don't need to check for -EAGAIN because we're a free space + * cache inode */ ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 97bc1ffc9c7b..e3fe137fb826 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4197,10 +4197,20 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int extent_type = -1; int ret; int err = 0; + int be_nice = 0; u64 ino = btrfs_ino(inode); + u64 bytes_deleted = 0; BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); + /* + * for non-free space inodes and ref cows, we want to back off from + * time to time + */ + if (!btrfs_is_free_space_inode(inode) && + test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + be_nice = 1; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -4230,6 +4240,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, key.type = (u8)-1; search_again: + /* + * with a 16K leaf size and 128MB extents, you can actually queue + * up a huge file in a single leaf. Most of the time that + * bytes_deleted is > 0, it will be huge by the time we get here + */ + if (be_nice && bytes_deleted > 32 * 1024 * 1024) { + if (btrfs_should_end_transaction(trans, root)) { + err = -EAGAIN; + goto error; + } + } + + path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { @@ -4376,11 +4399,18 @@ delete: (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || root == root->fs_info->tree_root)) { btrfs_set_path_blocking(path); + bytes_deleted += extent_num_bytes; ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), ino, extent_offset, 0); BUG_ON(ret); + if (be_nice && pending_del_nr && + (pending_del_nr % 16 == 0) && + bytes_deleted > 1024 * 1024) { + btrfs_async_run_delayed_refs(root, + trans->delayed_ref_updates * 2, 0); + } } if (found_type == BTRFS_INODE_ITEM_KEY) @@ -4416,7 +4446,18 @@ error: if (last_size != (u64)-1 && root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) btrfs_ordered_update_i_size(inode, last_size, NULL); + btrfs_free_path(path); + + if (be_nice && bytes_deleted > 32 * 1024 * 1024) { + unsigned long updates = trans->delayed_ref_updates; + if (updates) { + trans->delayed_ref_updates = 0; + ret = btrfs_run_delayed_refs(trans, root, updates * 2); + if (ret && !err) + err = ret; + } + } return err; } @@ -5013,7 +5054,7 @@ void btrfs_evict_inode(struct inode *inode) trans->block_rsv = rsv; ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); - if (ret != -ENOSPC) + if (ret != -ENOSPC && ret != -EAGAIN) break; trans->block_rsv = &root->fs_info->trans_block_rsv; @@ -8582,7 +8623,7 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); - if (ret != -ENOSPC) { + if (ret != -ENOSPC && ret != -EAGAIN) { err = ret; break; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 91c303ac40b6..ba831ee41891 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -718,7 +718,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (updates) { - err = btrfs_run_delayed_refs(trans, root, updates); + err = btrfs_run_delayed_refs(trans, root, updates * 2); if (err) /* Error code will also eval true */ return err; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 016c90fc85db..a089b5944efc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4251,8 +4251,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, &BTRFS_I(inode)->runtime_flags); clear_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); - ret = btrfs_truncate_inode_items(trans, log, - inode, 0, 0); + while(1) { + ret = btrfs_truncate_inode_items(trans, + log, inode, 0, 0); + if (ret != -EAGAIN) + break; + } } } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags) || -- cgit v1.2.3 From 1262133b8d6f10f5ca7621cd4cf65ddf6254126a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Feb 2015 07:50:16 -0800 Subject: Btrfs: account for crcs in delayed ref processing As we delete large extents, we end up doing huge amounts of COW in order to delete the corresponding crcs. This adds accounting so that we keep track of that space and flushing of delayed refs so that we don't build up too much delayed crc work. This helps limit the delayed work that must be done at commit time and tries to avoid ENOSPC aborts because the crcs eat all the global reserves. Signed-off-by: Chris Mason --- fs/btrfs/delayed-ref.c | 22 ++++++++++++++++++++-- fs/btrfs/delayed-ref.h | 10 ++++++++++ fs/btrfs/extent-tree.c | 46 +++++++++++++++++++++++++++++++--------------- fs/btrfs/inode.c | 25 ++++++++++++++++++------- fs/btrfs/transaction.c | 4 ++++ 5 files changed, 83 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6d16bea94e1c..8f8ed7d20bac 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans, * existing and update must have the same bytenr */ static noinline void -update_existing_head_ref(struct btrfs_delayed_ref_node *existing, +update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_node *existing, struct btrfs_delayed_ref_node *update) { struct btrfs_delayed_ref_head *existing_ref; struct btrfs_delayed_ref_head *ref; + int old_ref_mod; existing_ref = btrfs_delayed_node_to_head(existing); ref = btrfs_delayed_node_to_head(update); @@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, * only need the lock for this case cause we could be processing it * currently, for refs we just added we know we're a-ok. */ + old_ref_mod = existing_ref->total_ref_mod; existing->ref_mod += update->ref_mod; + existing_ref->total_ref_mod += update->ref_mod; + + /* + * If we are going to from a positive ref mod to a negative or vice + * versa we need to make sure to adjust pending_csums accordingly. + */ + if (existing_ref->is_data) { + if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0) + delayed_refs->pending_csums -= existing->num_bytes; + if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0) + delayed_refs->pending_csums += existing->num_bytes; + } spin_unlock(&existing_ref->lock); } @@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref->is_data = is_data; head_ref->ref_root = RB_ROOT; head_ref->processing = 0; + head_ref->total_ref_mod = count_mod; spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); @@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, existing = htree_insert(&delayed_refs->href_root, &head_ref->href_node); if (existing) { - update_existing_head_ref(&existing->node, ref); + update_existing_head_ref(delayed_refs, &existing->node, ref); /* * we've updated the existing ref, free the newly * allocated ref @@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); head_ref = existing; } else { + if (is_data && count_mod < 0) + delayed_refs->pending_csums += num_bytes; delayed_refs->num_heads++; delayed_refs->num_heads_ready++; atomic_inc(&delayed_refs->num_entries); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index a764e2340d48..5eb0892396d0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head { struct rb_node href_node; struct btrfs_delayed_extent_op *extent_op; + + /* + * This is used to track the final ref_mod from all the refs associated + * with this head ref, this is not adjusted as delayed refs are run, + * this is meant to track if we need to do the csum accounting or not. + */ + int total_ref_mod; + /* * when a new extent is allocated, it is just reserved in memory * The actual extent isn't inserted into the extent allocation tree @@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root { /* total number of head nodes ready for processing */ unsigned long num_heads_ready; + u64 pending_csums; + /* * set when the tree is flushing before a transaction commit, * used by the throttling code to decide if new updates need diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 41e5812c131f..a6f88eb57b39 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { + if (locked_ref->is_data && + locked_ref->total_ref_mod < 0) { + spin_lock(&delayed_refs->lock); + delayed_refs->pending_csums -= ref->num_bytes; + spin_unlock(&delayed_refs->lock); + } btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } @@ -2626,11 +2632,31 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); } +/* + * Takes the number of bytes to be csumm'ed and figures out how many leaves it + * would require to store the csums for that many bytes. + */ +static u64 csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes) +{ + u64 csum_size; + u64 num_csums_per_leaf; + u64 num_csums; + + csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); + num_csums_per_leaf = div64_u64(csum_size, + (u64)btrfs_super_csum_size(root->fs_info->super_copy)); + num_csums = div64_u64(csum_bytes, root->sectorsize); + num_csums += num_csums_per_leaf - 1; + num_csums = div64_u64(num_csums, num_csums_per_leaf); + return num_csums; +} + int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_block_rsv *global_rsv; u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; + u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; u64 num_bytes; int ret = 0; @@ -2639,6 +2665,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, if (num_heads > 1) num_bytes += (num_heads - 1) * root->nodesize; num_bytes <<= 1; + num_bytes += csum_bytes_to_leaves(root, csum_bytes) * root->nodesize; global_rsv = &root->fs_info->global_block_rsv; /* @@ -5065,30 +5092,19 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, int reserve) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 csum_size; - int num_csums_per_leaf; - int num_csums; - int old_csums; + u64 old_csums, num_csums; if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && BTRFS_I(inode)->csum_bytes == 0) return 0; - old_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + old_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); + if (reserve) BTRFS_I(inode)->csum_bytes += num_bytes; else BTRFS_I(inode)->csum_bytes -= num_bytes; - csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); - num_csums_per_leaf = (int)div_u64(csum_size, - sizeof(struct btrfs_csum_item) + - sizeof(struct btrfs_disk_key)); - num_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); - num_csums = num_csums + num_csums_per_leaf - 1; - num_csums = num_csums / num_csums_per_leaf; - - old_csums = old_csums + num_csums_per_leaf - 1; - old_csums = old_csums / num_csums_per_leaf; + num_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); /* No change, no need to reserve more */ if (old_csums == num_csums) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e3fe137fb826..cec23cf812ee 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4197,9 +4197,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int extent_type = -1; int ret; int err = 0; - int be_nice = 0; u64 ino = btrfs_ino(inode); u64 bytes_deleted = 0; + bool be_nice = 0; + bool should_throttle = 0; BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); @@ -4405,19 +4406,20 @@ delete: btrfs_header_owner(leaf), ino, extent_offset, 0); BUG_ON(ret); - if (be_nice && pending_del_nr && - (pending_del_nr % 16 == 0) && - bytes_deleted > 1024 * 1024) { + if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, trans->delayed_ref_updates * 2, 0); - } } if (found_type == BTRFS_INODE_ITEM_KEY) break; + should_throttle = + btrfs_should_throttle_delayed_refs(trans, root); + if (path->slots[0] == 0 || - path->slots[0] != pending_del_slot) { + path->slots[0] != pending_del_slot || + (be_nice && should_throttle)) { if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, @@ -4430,6 +4432,15 @@ delete: pending_del_nr = 0; } btrfs_release_path(path); + if (be_nice && should_throttle) { + unsigned long updates = trans->delayed_ref_updates; + if (updates) { + trans->delayed_ref_updates = 0; + ret = btrfs_run_delayed_refs(trans, root, updates * 2); + if (ret && !err) + err = ret; + } + } goto search_again; } else { path->slots[0]--; @@ -4449,7 +4460,7 @@ error: btrfs_free_path(path); - if (be_nice && bytes_deleted > 32 * 1024 * 1024) { + if (be_nice && btrfs_should_throttle_delayed_refs(trans, root)) { unsigned long updates = trans->delayed_ref_updates; if (updates) { trans->delayed_ref_updates = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ba831ee41891..8b9eea8f2406 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); + if (transaction->delayed_refs.pending_csums) + printk(KERN_ERR "pending csums is %llu\n", + transaction->delayed_refs.pending_csums); while (!list_empty(&transaction->pending_chunks)) { struct extent_map *em; @@ -223,6 +226,7 @@ loop: cur_trans->delayed_refs.href_root = RB_ROOT; atomic_set(&cur_trans->delayed_refs.num_entries, 0); cur_trans->delayed_refs.num_heads_ready = 0; + cur_trans->delayed_refs.pending_csums = 0; cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; -- cgit v1.2.3 From 28f75a0e6cdfbce8115487ecbc0968a2c4e01806 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Feb 2015 06:59:29 -0800 Subject: Btrfs: refill block reserves during truncate When truncate starts, it allocates some space in the block reserves so that we'll have enough to update metadata along the way. For very large files, we can easily go through all of that space as we loop through the extents. This changes truncate to refill the space reservation as it progresses through the file. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/extent-tree.c | 9 ++++----- fs/btrfs/inode.c | 45 +++++++++++++++++++++++++++++++++++++++------ 3 files changed, 46 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 95944b81ed5c..6bf16d5134c5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3297,6 +3297,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) } /* extent-tree.c */ + +u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes); + static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, unsigned num_items) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a6f88eb57b39..75f4bed6e6db 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2636,7 +2636,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) * Takes the number of bytes to be csumm'ed and figures out how many leaves it * would require to store the csums for that many bytes. */ -static u64 csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes) +u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes) { u64 csum_size; u64 num_csums_per_leaf; @@ -2665,7 +2665,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, if (num_heads > 1) num_bytes += (num_heads - 1) * root->nodesize; num_bytes <<= 1; - num_bytes += csum_bytes_to_leaves(root, csum_bytes) * root->nodesize; + num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize; global_rsv = &root->fs_info->global_block_rsv; /* @@ -5098,13 +5098,12 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, BTRFS_I(inode)->csum_bytes == 0) return 0; - old_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); - + old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); if (reserve) BTRFS_I(inode)->csum_bytes += num_bytes; else BTRFS_I(inode)->csum_bytes -= num_bytes; - num_csums = csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); + num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); /* No change, no need to reserve more */ if (old_csums == num_csums) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cec23cf812ee..88537c52e114 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4163,6 +4163,21 @@ out: return err; } +static int truncate_space_check(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytes_deleted) +{ + int ret; + + bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); + ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, + bytes_deleted, BTRFS_RESERVE_NO_FLUSH); + if (!ret) + trans->bytes_reserved += bytes_deleted; + return ret; + +} + /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -4201,6 +4216,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 bytes_deleted = 0; bool be_nice = 0; bool should_throttle = 0; + bool should_end = 0; BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); @@ -4396,6 +4412,8 @@ delete: } else { break; } + should_throttle = 0; + if (found_extent && (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || root == root->fs_info->tree_root)) { @@ -4409,17 +4427,24 @@ delete: if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, trans->delayed_ref_updates * 2, 0); + if (be_nice) { + if (truncate_space_check(trans, root, + extent_num_bytes)) { + should_end = 1; + } + if (btrfs_should_throttle_delayed_refs(trans, + root)) { + should_throttle = 1; + } + } } if (found_type == BTRFS_INODE_ITEM_KEY) break; - should_throttle = - btrfs_should_throttle_delayed_refs(trans, root); - if (path->slots[0] == 0 || path->slots[0] != pending_del_slot || - (be_nice && should_throttle)) { + should_throttle || should_end) { if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, @@ -4432,7 +4457,7 @@ delete: pending_del_nr = 0; } btrfs_release_path(path); - if (be_nice && should_throttle) { + if (should_throttle) { unsigned long updates = trans->delayed_ref_updates; if (updates) { trans->delayed_ref_updates = 0; @@ -4441,6 +4466,14 @@ delete: err = ret; } } + /* + * if we failed to refill our space rsv, bail out + * and let the transaction restart + */ + if (should_end) { + err = -EAGAIN; + goto error; + } goto search_again; } else { path->slots[0]--; @@ -4460,7 +4493,7 @@ error: btrfs_free_path(path); - if (be_nice && btrfs_should_throttle_delayed_refs(trans, root)) { + if (be_nice && bytes_deleted > 32 * 1024 * 1024) { unsigned long updates = trans->delayed_ref_updates; if (updates) { trans->delayed_ref_updates = 0; -- cgit v1.2.3 From 3bce876fd58a745b8a1bc0bd8325c3e5b4cebeb0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 24 Feb 2015 12:35:51 -0800 Subject: Btrfs: don't steal from the global reserve if we don't have the space btrfs_evict_inode() needs to be more careful about stealing from the global_rsv. We dont' want to end up aborting commit with ENOSPC just because the evict_inode code was too greedy. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 88537c52e114..141df0ce6f93 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5010,6 +5010,7 @@ void btrfs_evict_inode(struct inode *inode) struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_rsv *rsv, *global_rsv; + int steal_from_global = 0; u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); int ret; @@ -5077,9 +5078,20 @@ void btrfs_evict_inode(struct inode *inode) * hard as possible to get this to work. */ if (ret) - ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); + steal_from_global++; + else + steal_from_global = 0; + ret = 0; - if (ret) { + /* + * steal_from_global == 0: we reserved stuff, hooray! + * steal_from_global == 1: we didn't reserve stuff, boo! + * steal_from_global == 2: we've committed, still not a lot of + * room but maybe we'll have room in the global reserve this + * time. + * steal_from_global == 3: abandon all hope! + */ + if (steal_from_global > 2) { btrfs_warn(root->fs_info, "Could not get space for a delete, will truncate on mount %d", ret); @@ -5095,6 +5107,36 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } + /* + * We can't just steal from the global reserve, we need tomake + * sure there is room to do it, if not we need to commit and try + * again. + */ + if (steal_from_global) { + if (!btrfs_check_space_for_delayed_refs(trans, root)) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, + min_size); + else + ret = -ENOSPC; + } + + /* + * Couldn't steal from the global reserve, we have too much + * pending stuff built up, commit the transaction and try it + * again. + */ + if (ret) { + ret = btrfs_commit_transaction(trans, root); + if (ret) { + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; + } + continue; + } else { + steal_from_global = 0; + } + trans->block_rsv = rsv; ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); -- cgit v1.2.3 From d7c151717a1efe289aec29fb9f94485f64262c0b Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 26 Feb 2015 10:49:20 +0800 Subject: btrfs: Fix NO_SPACE bug caused by delayed-iput Steps to reproduce: while true; do dd if=/dev/zero of=/btrfs_dir/file count=[fs_size * 75%] rm /btrfs_dir/file sync done And we'll see dd failed because btrfs return NO_SPACE. Reason: Normally, btrfs_commit_transaction() call btrfs_run_delayed_iputs() in end to free fs space for next write, but sometimes it hadn't done work on time, because btrfs-cleaner thread get delayed-iputs from list before, but do iput() after next write. This is log: [ 2569.050776] comm=btrfs-cleaner func=btrfs_evict_inode() begin [ 2569.084280] comm=sync func=btrfs_commit_transaction() call btrfs_run_delayed_iputs() [ 2569.085418] comm=sync func=btrfs_commit_transaction() done btrfs_run_delayed_iputs() [ 2569.087554] comm=sync func=btrfs_commit_transaction() end [ 2569.191081] comm=dd begin [ 2569.790112] comm=dd func=__btrfs_buffered_write() ret=-28 [ 2569.847479] comm=btrfs-cleaner func=add_pinned_bytes() 0 + 32677888 = 32677888 [ 2569.849530] comm=btrfs-cleaner func=add_pinned_bytes() 32677888 + 23834624 = 56512512 ... [ 2569.903893] comm=btrfs-cleaner func=add_pinned_bytes() 943976448 + 21762048 = 965738496 [ 2569.908270] comm=btrfs-cleaner func=btrfs_evict_inode() end Fix: Make btrfs_commit_transaction() wait current running btrfs-cleaner's delayed-iputs() done in end. Test: Use script similar to above(more complex), before patch: 7 failed in 100 * 20 loop. after patch: 0 failed in 100 * 20 loop. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 3 ++- fs/btrfs/extent-tree.c | 6 ++++++ fs/btrfs/inode.c | 4 ++++ 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 10b6a75ab7e6..d48b22f31182 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1538,6 +1538,7 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; + struct rw_semaphore delayed_iput_sem; /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62cd3b63190d..2ef9a4b72d06 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2487,11 +2487,12 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); - mutex_init(&fs_info->unused_bg_unpin_mutex); rwlock_init(&fs_info->tree_mod_log_lock); + mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); + init_rwsem(&fs_info->delayed_iput_sem); init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be4a79a69ed1..46cb1d414912 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3950,6 +3950,12 @@ commit_trans: ret = btrfs_commit_transaction(trans, root); if (ret) return ret; + /* + * make sure that all running delayed iput are + * done + */ + down_write(&root->fs_info->delayed_iput_sem); + up_write(&root->fs_info->delayed_iput_sem); goto again; } else { btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 141df0ce6f93..6ef97c184c7b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3111,6 +3111,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) if (empty) return; + down_read(&fs_info->delayed_iput_sem); + spin_lock(&fs_info->delayed_iput_lock); list_splice_init(&fs_info->delayed_iputs, &list); spin_unlock(&fs_info->delayed_iput_lock); @@ -3121,6 +3123,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) iput(delayed->inode); kfree(delayed); } + + up_read(&root->fs_info->delayed_iput_sem); } /* -- cgit v1.2.3 From 31193213f1f9c13f6485007ef1e233b119e46910 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Fri, 12 Dec 2014 16:44:35 +0800 Subject: Btrfs: qgroup: Introduce a may_use to account space_info->bytes_may_use. Currently, for pre_alloc or delay_alloc, the bytes will be accounted in space_info by the three guys. space_info->bytes_may_use --- space_info->reserved --- space_info->used. But on the other hand, in qgroup, there are only two counters to account the bytes, qgroup->reserved and qgroup->excl. And qg->reserved accounts bytes in space_info->bytes_may_use and qg->excl accounts bytes in space_info->used. So the bytes in space_info->reserved is not accounted in qgroup. If so, there is a window we can exceed the quota limit when bytes is in space_info->reserved. Example: # btrfs quota enable /mnt # btrfs qgroup limit -e 10M /mnt # for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done # sync # btrfs qgroup show -pcre /mnt qgroupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 20987904 20987904 0 10485760 --- --- qg->excl is 20987904 larger than max_excl 10485760. This patch introduce a new counter named may_use to qgroup, then there are three counters in qgroup to account bytes in space_info as below. space_info->bytes_may_use --- space_info->reserved --- space_info->used. qgroup->may_use --- qgroup->reserved --- qgroup->excl With this patch applied: # btrfs quota enable /mnt # btrfs qgroup limit -e 10M /mnt # for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done fallocate: /mnt/data9: fallocate failed: Disk quota exceeded fallocate: /mnt/data10: fallocate failed: Disk quota exceeded fallocate: /mnt/data11: fallocate failed: Disk quota exceeded fallocate: /mnt/data12: fallocate failed: Disk quota exceeded fallocate: /mnt/data13: fallocate failed: Disk quota exceeded fallocate: /mnt/data14: fallocate failed: Disk quota exceeded fallocate: /mnt/data15: fallocate failed: Disk quota exceeded fallocate: /mnt/data16: fallocate failed: Disk quota exceeded fallocate: /mnt/data17: fallocate failed: Disk quota exceeded fallocate: /mnt/data18: fallocate failed: Disk quota exceeded fallocate: /mnt/data19: fallocate failed: Disk quota exceeded # sync # btrfs qgroup show -pcre /mnt qgroupid rfer excl max_rfer max_excl parent child -------- ---- ---- -------- -------- ------ ----- 0/5 9453568 9453568 0 10485760 --- --- Reported-by: Cyril SCETBON Signed-off-by: Dongsheng Yang Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 ++++++++++++++- fs/btrfs/inode.c | 18 ++++++++++++- fs/btrfs/qgroup.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/qgroup.h | 4 +++ 4 files changed, 104 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 695d5110e020..3113e0b79b99 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5725,8 +5725,12 @@ static int pin_down_extent(struct btrfs_root *root, set_extent_dirty(root->fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - if (reserved) + if (reserved) { + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + num_bytes, -1); trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); + } return 0; } @@ -6464,6 +6468,9 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + buf->len, -1); } out: if (pin) @@ -7196,7 +7203,11 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + len, -1); } + btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, start, len); @@ -7433,6 +7444,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + ins->offset, 1); btrfs_put_block_group(block_group); return ret; } @@ -7579,6 +7593,10 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, return ERR_PTR(ret); } + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root_objectid, + ins.offset, 1); + buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); BUG_ON(IS_ERR(buf)); /* -ENOMEM */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6ef97c184c7b..a9f69a0d4b08 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -60,6 +60,7 @@ #include "backref.h" #include "hash.h" #include "props.h" +#include "qgroup.h" struct btrfs_iget_args { struct btrfs_key *location; @@ -753,7 +754,9 @@ retry: } goto out_free; } - + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + ins.offset, 1); /* * here we're doing allocation and writeback of the * compressed pages @@ -978,6 +981,10 @@ static noinline int cow_file_range(struct inode *inode, if (ret < 0) goto out_unlock; + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + ins.offset, 1); + em = alloc_extent_map(); if (!em) { ret = -ENOMEM; @@ -7030,6 +7037,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, return ERR_PTR(ret); } + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + ins.offset, 1); + return em; } @@ -9583,6 +9594,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans, root); break; } + + btrfs_qgroup_update_reserved_bytes(root->fs_info, + root->root_key.objectid, + ins.offset, 1); + btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index de321c90130c..cd291733dc3e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -72,6 +72,7 @@ struct btrfs_qgroup { /* * reservation tracking */ + u64 may_use; u64 reserved; /* @@ -1417,6 +1418,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); qgroup->excl += sign * oper->num_bytes; qgroup->excl_cmpr += sign * oper->num_bytes; + if (sign > 0) + qgroup->reserved -= oper->num_bytes; qgroup_dirty(fs_info, qgroup); @@ -1436,6 +1439,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, qgroup->rfer_cmpr += sign * oper->num_bytes; WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); qgroup->excl += sign * oper->num_bytes; + if (sign > 0) + qgroup->reserved -= oper->num_bytes; qgroup->excl_cmpr += sign * oper->num_bytes; qgroup_dirty(fs_info, qgroup); @@ -2378,6 +2383,61 @@ out: return ret; } +int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info, + u64 ref_root, + u64 num_bytes, + int sign) +{ + struct btrfs_root *quota_root; + struct btrfs_qgroup *qgroup; + int ret = 0; + struct ulist_node *unode; + struct ulist_iterator uiter; + + if (!is_fstree(ref_root) || !fs_info->quota_enabled) + return 0; + + if (num_bytes == 0) + return 0; + + spin_lock(&fs_info->qgroup_lock); + quota_root = fs_info->quota_root; + if (!quota_root) + goto out; + + qgroup = find_qgroup_rb(fs_info, ref_root); + if (!qgroup) + goto out; + + ulist_reinit(fs_info->qgroup_ulist); + ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, + (uintptr_t)qgroup, GFP_ATOMIC); + if (ret < 0) + goto out; + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + + qg = u64_to_ptr(unode->aux); + + qg->reserved += sign * num_bytes; + + list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(fs_info->qgroup_ulist, + glist->group->qgroupid, + (uintptr_t)glist->group, GFP_ATOMIC); + if (ret < 0) + goto out; + } + } + +out: + spin_unlock(&fs_info->qgroup_lock); + return ret; +} + /* * reserve some space for a qgroup and all its parents. The reservation takes * place with start_transaction or dealloc_reserve, similar to ENOSPC @@ -2426,14 +2486,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) qg = u64_to_ptr(unode->aux); if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && - qg->reserved + (s64)qg->rfer + num_bytes > + qg->reserved + qg->may_use + (s64)qg->rfer + num_bytes > qg->max_rfer) { ret = -EDQUOT; goto out; } if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && - qg->reserved + (s64)qg->excl + num_bytes > + qg->reserved + qg->may_use + (s64)qg->excl + num_bytes > qg->max_excl) { ret = -EDQUOT; goto out; @@ -2457,7 +2517,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) qg = u64_to_ptr(unode->aux); - qg->reserved += num_bytes; + qg->may_use += num_bytes; } out: @@ -2503,7 +2563,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) qg = u64_to_ptr(unode->aux); - qg->reserved -= num_bytes; + qg->may_use -= num_bytes; list_for_each_entry(glist, &qg->groups, next_group) { ret = ulist_add(fs_info->qgroup_ulist, diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index c5242aa9a4b2..64d49b8482b3 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -94,6 +94,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, struct btrfs_qgroup_inherit *inherit); +int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info, + u64 ref_root, + u64 num_bytes, + int sign); int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); -- cgit v1.2.3 From e2d1f92399afb6ec518b68867ed10db2585b283a Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Fri, 6 Feb 2015 10:26:52 -0500 Subject: btrfs: qgroup: do a reservation in a higher level. There are two problems in qgroup: a). The PAGE_CACHE is 4K, even when we are writing a data of 1K, qgroup will reserve a 4K size. It will cause the last 3K in a qgroup is not available to user. b). When user is writing a inline data, qgroup will not reserve it, it means this is a window we can exceed the limit of a qgroup. The main idea of this patch is reserving the data size of write_bytes rather than the reserve_bytes. It means qgroup will not care about the data size btrfs will reserve for user, but only care about the data size user is going to write. Then reserve it when user want to write and release it in transaction committed. In this way, qgroup can be released from the complex procedure in btrfs and only do the reserve when user want to write and account when the data is written in commit_transaction(). Signed-off-by: Dongsheng Yang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 38 +++++---------------------- fs/btrfs/file.c | 4 +-- fs/btrfs/inode.c | 15 ----------- fs/btrfs/qgroup.c | 70 +++----------------------------------------------- fs/btrfs/qgroup.h | 4 --- fs/btrfs/relocation.c | 2 +- 7 files changed, 14 insertions(+), 121 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d48b22f31182..851f2355f3c8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3448,7 +3448,7 @@ enum btrfs_reserve_flush_enum { BTRFS_RESERVE_FLUSH_ALL, }; -int btrfs_check_data_free_space(struct inode *inode, u64 bytes); +int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d3774605a85..1eef4ee01d1a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3331,7 +3331,7 @@ again: num_pages *= 16; num_pages *= PAGE_CACHE_SIZE; - ret = btrfs_check_data_free_space(inode, num_pages); + ret = btrfs_check_data_free_space(inode, num_pages, num_pages); if (ret) goto out_put; @@ -3851,7 +3851,7 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) * This will check the space that the inode allocates from to make sure we have * enough space for bytes. */ -int btrfs_check_data_free_space(struct inode *inode, u64 bytes) +int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes) { struct btrfs_space_info *data_sinfo; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3969,7 +3969,7 @@ commit_trans: data_sinfo->flags, bytes, 1); return -ENOSPC; } - ret = btrfs_qgroup_reserve(root, bytes); + ret = btrfs_qgroup_reserve(root, write_bytes); if (ret) goto out; data_sinfo->bytes_may_use += bytes; @@ -3995,7 +3995,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) data_sinfo = root->fs_info->data_sinfo; spin_lock(&data_sinfo->lock); WARN_ON(data_sinfo->bytes_may_use < bytes); - btrfs_qgroup_free(root, bytes); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", data_sinfo->flags, bytes, 0); @@ -5243,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root, u64 qgroup_reserved) { btrfs_block_rsv_release(root, rsv, (u64)-1); - if (qgroup_reserved) - btrfs_qgroup_free(root, qgroup_reserved); } /** @@ -5478,11 +5475,8 @@ out_fail: to_free = 0; } spin_unlock(&BTRFS_I(inode)->lock); - if (dropped) { - if (root->fs_info->quota_enabled) - btrfs_qgroup_free(root, dropped * root->nodesize); + if (dropped) to_free += btrfs_calc_trans_metadata_size(root, dropped); - } if (to_free) { btrfs_block_rsv_release(root, block_rsv, to_free); @@ -5524,9 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_free, 0); - if (root->fs_info->quota_enabled) { - btrfs_qgroup_free(root, dropped * root->nodesize); - } btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, to_free); @@ -5551,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) { int ret; - ret = btrfs_check_data_free_space(inode, num_bytes); + ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes); if (ret) return ret; @@ -5727,12 +5718,8 @@ static int pin_down_extent(struct btrfs_root *root, set_extent_dirty(root->fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - if (reserved) { - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - num_bytes, -1); + if (reserved) trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); - } return 0; } @@ -6470,9 +6457,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - buf->len, -1); } out: if (pin) @@ -7205,9 +7189,6 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - len, -1); } btrfs_put_block_group(cache); @@ -7446,9 +7427,6 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - ins->offset, 1); btrfs_put_block_group(block_group); return ret; } @@ -7595,10 +7573,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, return ERR_PTR(ret); } - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root_objectid, - ins.offset, 1); - buf = btrfs_init_new_buffer(trans, root, ins.objectid, level); BUG_ON(IS_ERR(buf)); /* -ENOMEM */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index faef1d64394d..23b6e03f8465 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1510,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, } reserve_bytes = num_pages << PAGE_CACHE_SHIFT; - ret = btrfs_check_data_free_space(inode, reserve_bytes); + ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes); if (ret == -ENOSPC && (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) { @@ -2573,7 +2573,7 @@ static long btrfs_fallocate(struct file *file, int mode, * Make sure we have enough space before we do the * allocation. */ - ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); + ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start); if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a9f69a0d4b08..27b59b8362f9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -754,9 +754,6 @@ retry: } goto out_free; } - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - ins.offset, 1); /* * here we're doing allocation and writeback of the * compressed pages @@ -981,10 +978,6 @@ static noinline int cow_file_range(struct inode *inode, if (ret < 0) goto out_unlock; - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - ins.offset, 1); - em = alloc_extent_map(); if (!em) { ret = -ENOMEM; @@ -7037,10 +7030,6 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, return ERR_PTR(ret); } - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - ins.offset, 1); - return em; } @@ -9595,10 +9584,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, break; } - btrfs_qgroup_update_reserved_bytes(root->fs_info, - root->root_key.objectid, - ins.offset, 1); - btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cd291733dc3e..17881ad8ed96 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -72,7 +72,6 @@ struct btrfs_qgroup { /* * reservation tracking */ - u64 may_use; u64 reserved; /* @@ -2383,67 +2382,6 @@ out: return ret; } -int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info, - u64 ref_root, - u64 num_bytes, - int sign) -{ - struct btrfs_root *quota_root; - struct btrfs_qgroup *qgroup; - int ret = 0; - struct ulist_node *unode; - struct ulist_iterator uiter; - - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - return 0; - - if (num_bytes == 0) - return 0; - - spin_lock(&fs_info->qgroup_lock); - quota_root = fs_info->quota_root; - if (!quota_root) - goto out; - - qgroup = find_qgroup_rb(fs_info, ref_root); - if (!qgroup) - goto out; - - ulist_reinit(fs_info->qgroup_ulist); - ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, - (uintptr_t)qgroup, GFP_ATOMIC); - if (ret < 0) - goto out; - - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - - qg = u64_to_ptr(unode->aux); - - qg->reserved += sign * num_bytes; - - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(fs_info->qgroup_ulist, - glist->group->qgroupid, - (uintptr_t)glist->group, GFP_ATOMIC); - if (ret < 0) - goto out; - } - } - -out: - spin_unlock(&fs_info->qgroup_lock); - return ret; -} - -/* - * reserve some space for a qgroup and all its parents. The reservation takes - * place with start_transaction or dealloc_reserve, similar to ENOSPC - * accounting. If not enough space is available, EDQUOT is returned. - * We assume that the requested space is new for all qgroups. - */ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) { struct btrfs_root *quota_root; @@ -2486,14 +2424,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) qg = u64_to_ptr(unode->aux); if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && - qg->reserved + qg->may_use + (s64)qg->rfer + num_bytes > + qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) { ret = -EDQUOT; goto out; } if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && - qg->reserved + qg->may_use + (s64)qg->excl + num_bytes > + qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) { ret = -EDQUOT; goto out; @@ -2517,7 +2455,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) qg = u64_to_ptr(unode->aux); - qg->may_use += num_bytes; + qg->reserved += num_bytes; } out: @@ -2563,7 +2501,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) qg = u64_to_ptr(unode->aux); - qg->may_use -= num_bytes; + qg->reserved -= num_bytes; list_for_each_entry(glist, &qg->groups, next_group) { ret = ulist_add(fs_info->qgroup_ulist, diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 64d49b8482b3..c5242aa9a4b2 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -94,10 +94,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, struct btrfs_qgroup_inherit *inherit); -int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info, - u64 ref_root, - u64 num_bytes, - int sign); int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes); void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 840a4eb0f396..74b24b01d574 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode, mutex_lock(&inode->i_mutex); ret = btrfs_check_data_free_space(inode, cluster->end + - 1 - cluster->start); + 1 - cluster->start, 0); if (ret) goto out; -- cgit v1.2.3