diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-group.c | 1 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/delalloc-space.c | 7 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/file.c | 56 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 4 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 15 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 6 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
-rw-r--r-- | fs/btrfs/ref-verify.c | 2 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 9 | ||||
-rw-r--r-- | fs/btrfs/send.c | 2 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 36 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 6 |
14 files changed, 90 insertions, 69 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index bf7e3f23bba7..670700cb1110 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1761,6 +1761,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_err(info, "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", cache->key.objectid); + btrfs_put_block_group(cache); ret = -EINVAL; goto error; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19d669d12ca1..fe2b8765d9e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -734,8 +734,6 @@ struct btrfs_fs_info { struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *delayed_workers; - /* the extent workers do delayed refs on the extent allocation tree */ - struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; u32 thread_pool_size; @@ -2489,8 +2487,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, int nitems, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv); -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free); +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index d949d7d2abed..db9f2c58eb4a 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -381,7 +381,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) out_qgroup: btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); out_fail: - btrfs_inode_rsv_release(inode, true); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return ret; @@ -418,7 +417,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, * btrfs_delalloc_release_extents - release our outstanding_extents * @inode: the inode to balance the reservation for. * @num_bytes: the number of bytes we originally reserved with - * @qgroup_free: do we need to free qgroup meta reservation or convert them. * * When we reserve space we increase outstanding_extents for the extents we may * add. Once we've set the range as delalloc or created our ordered extents we @@ -426,8 +424,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, * temporarily tracked outstanding_extents. This _must_ be used in conjunction * with btrfs_delalloc_reserve_metadata. */ -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free) +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned num_extents; @@ -441,7 +438,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, if (btrfs_is_testing(fs_info)) return; - btrfs_inode_rsv_release(inode, qgroup_free); + btrfs_inode_rsv_release(inode, true); } /** diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 044981cf6df9..402b61bf345c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2008,7 +2008,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); - btrfs_destroy_workqueue(fs_info->extent_workers); /* * Now that all other work queues are destroyed, we can safely destroy * the queues used for metadata I/O, since tasks from those other work @@ -2214,10 +2213,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, max_active, 2); fs_info->qgroup_rescan_workers = btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); - fs_info->extent_workers = - btrfs_alloc_workqueue(fs_info, "extent-refs", flags, - min_t(u64, fs_devices->num_devices, - max_active), 8); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->submit_workers && fs_info->flush_workers && @@ -2228,7 +2223,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && - fs_info->extent_workers && fs_info->qgroup_rescan_workers)) { return -ENOMEM; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8fe4eb7e5045..435a502a3226 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1591,7 +1591,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; - struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; u64 release_bytes = 0; u64 lockstart; @@ -1611,6 +1610,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, return -ENOMEM; while (iov_iter_count(i) > 0) { + struct extent_state *cached_state = NULL; size_t offset = offset_in_page(pos); size_t sector_offset; size_t write_bytes = min(iov_iter_count(i), @@ -1692,7 +1692,7 @@ again: force_page_uptodate); if (ret) { btrfs_delalloc_release_extents(BTRFS_I(inode), - reserve_bytes, true); + reserve_bytes); break; } @@ -1704,7 +1704,7 @@ again: if (extents_locked == -EAGAIN) goto again; btrfs_delalloc_release_extents(BTRFS_I(inode), - reserve_bytes, true); + reserve_bytes); ret = extents_locked; break; } @@ -1758,11 +1758,21 @@ again: if (copied > 0) ret = btrfs_dirty_pages(inode, pages, dirty_pages, pos, copied, &cached_state); + + /* + * If we have not locked the extent range, because the range's + * start offset is >= i_size, we might still have a non-NULL + * cached extent state, acquired while marking the extent range + * as delalloc through btrfs_dirty_pages(). Therefore free any + * possible cached extent state to avoid a memory leak. + */ if (extents_locked) unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); - btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - true); + else + free_extent_state(cached_state); + + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { btrfs_drop_pages(pages, num_pages); break; @@ -2057,25 +2067,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_trans_handle *trans; struct btrfs_log_ctx ctx; int ret = 0, err; - u64 len; - /* - * If the inode needs a full sync, make sure we use a full range to - * avoid log tree corruption, due to hole detection racing with ordered - * extent completion for adjacent ranges, and assertion failures during - * hole detection. - */ - if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags)) { - start = 0; - end = LLONG_MAX; - } - - /* - * The range length can be represented by u64, we have to do the typecasts - * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() - */ - len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); btrfs_init_log_ctx(&ctx, inode); @@ -2102,6 +2094,19 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. Do this while holding the inode lock, to avoid races + * with other tasks. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * Before we acquired the inode's lock, someone may have dirtied more * pages in the target range. We need to make sure that writeback for * any such pages does not start while we are logging the inode, because @@ -2128,8 +2133,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) /* * We have to do this here to avoid the priority inversion of waiting on * IO of a lower priority task while holding a transaction open. + * + * Also, the range length can be represented by u64, we have to do the + * typecasts to avoid signed overflow if it's [0, LLONG_MAX]. */ - ret = btrfs_wait_ordered_range(inode, start, len); + ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1); if (ret) { up_write(&BTRFS_I(inode)->dio_sem); inode_unlock(inode); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 63cad7865d75..37345fb6191d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -501,13 +501,13 @@ again: ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); if (ret) { - btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true); goto out_put; } ret = btrfs_write_out_ino_cache(root, trans, path, inode); - btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); out_put: iput(inode); out_release: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0546401bc0a..c3f386b7cc0b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2206,7 +2206,7 @@ again: ClearPageChecked(page); set_page_dirty(page); - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); @@ -4951,7 +4951,7 @@ again: if (!page) { btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); ret = -ENOMEM; goto out; } @@ -5018,7 +5018,7 @@ out_unlock: if (ret) btrfs_delalloc_release_space(inode, data_reserved, block_start, blocksize, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0)); + btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); unlock_page(page); put_page(page); out: @@ -6305,13 +6305,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, u32 sizes[2]; int nitems = name ? 2 : 1; unsigned long ptr; + unsigned int nofs_flag; int ret; path = btrfs_alloc_path(); if (!path) return ERR_PTR(-ENOMEM); + nofs_flag = memalloc_nofs_save(); inode = new_inode(fs_info->sb); + memalloc_nofs_restore(nofs_flag); if (!inode) { btrfs_free_path(path); return ERR_PTR(-ENOMEM); @@ -8706,7 +8709,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, data_reserved, offset, count - (size_t)ret, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), count, false); + btrfs_delalloc_release_extents(BTRFS_I(inode), count); } out: if (wakeup) @@ -9056,7 +9059,7 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state); if (!ret2) { - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); sb_end_pagefault(inode->i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; @@ -9065,7 +9068,7 @@ again: out_unlock: unlock_page(page); out: - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); btrfs_delalloc_release_space(inode, data_reserved, page_start, reserved_space, (ret != 0)); out_noreserve: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index de730e56d3f5..7c145a41decd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1360,8 +1360,7 @@ again: unlock_page(pages[i]); put_page(pages[i]); } - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, - false); + btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return i_done; out: @@ -1372,8 +1371,7 @@ out: btrfs_delalloc_release_space(inode, data_reserved, start_index << PAGE_SHIFT, page_cnt << PAGE_SHIFT, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, - true); + btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c4bb69941c77..3ad151655eb8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3629,7 +3629,7 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, return 0; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - trace_qgroup_meta_reserve(root, type, (s64)num_bytes); + trace_qgroup_meta_reserve(root, (s64)num_bytes, type); ret = qgroup_reserve(root, num_bytes, enforce, type); if (ret < 0) return ret; @@ -3676,7 +3676,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, */ num_bytes = sub_root_meta_rsv(root, num_bytes, type); BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - trace_qgroup_meta_reserve(root, type, -(s64)num_bytes); + trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, num_bytes, type); } diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index e87cbdad02a3..b57f3618e58e 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -500,7 +500,7 @@ static int process_leaf(struct btrfs_root *root, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u32 count; - int i = 0, tree_block_level = 0, ret; + int i = 0, tree_block_level = 0, ret = 0; struct btrfs_key key; int nritems = btrfs_header_nritems(leaf); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 00504657b602..5cd42b66818c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3277,6 +3277,8 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!page) { btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), + PAGE_SIZE); ret = -ENOMEM; goto out; } @@ -3297,7 +3299,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); ret = -EIO; goto out; } @@ -3326,7 +3328,7 @@ static int relocate_file_extent_cluster(struct inode *inode, btrfs_delalloc_release_metadata(BTRFS_I(inode), PAGE_SIZE, true); btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE, true); + PAGE_SIZE); clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, @@ -3342,8 +3344,7 @@ static int relocate_file_extent_cluster(struct inode *inode, put_page(page); index++; - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, - false); + btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); balance_dirty_pages_ratelimited(inode->i_mapping); btrfs_throttle(fs_info); } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f3215028235c..123ac54af071 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5085,7 +5085,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *path; struct btrfs_key key; int ret; - u64 clone_src_i_size; + u64 clone_src_i_size = 0; /* * Prevent cloning from a zero offset with a length matching the sector diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 29b82a795522..8a6cc600bf18 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2932,7 +2932,8 @@ out: * in the tree of log roots */ static int update_log_root(struct btrfs_trans_handle *trans, - struct btrfs_root *log) + struct btrfs_root *log, + struct btrfs_root_item *root_item) { struct btrfs_fs_info *fs_info = log->fs_info; int ret; @@ -2940,10 +2941,10 @@ static int update_log_root(struct btrfs_trans_handle *trans, if (log->log_transid == 1) { /* insert root item on the first sync */ ret = btrfs_insert_root(trans, fs_info->log_root_tree, - &log->root_key, &log->root_item); + &log->root_key, root_item); } else { ret = btrfs_update_root(trans, fs_info->log_root_tree, - &log->root_key, &log->root_item); + &log->root_key, root_item); } return ret; } @@ -3041,6 +3042,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log = root->log_root; struct btrfs_root *log_root_tree = fs_info->log_root_tree; + struct btrfs_root_item new_root_item; int log_transid = 0; struct btrfs_log_ctx root_log_ctx; struct blk_plug plug; @@ -3104,18 +3106,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } + /* + * We _must_ update under the root->log_mutex in order to make sure we + * have a consistent view of the log root we are trying to commit at + * this moment. + * + * We _must_ copy this into a local copy, because we are not holding the + * log_root_tree->log_mutex yet. This is important because when we + * commit the log_root_tree we must have a consistent view of the + * log_root_tree when we update the super block to point at the + * log_root_tree bytenr. If we update the log_root_tree here we'll race + * with the commit and possibly point at the new block which we may not + * have written out. + */ btrfs_set_root_node(&log->root_item, log->node); + memcpy(&new_root_item, &log->root_item, sizeof(new_root_item)); root->log_transid++; log->log_transid = root->log_transid; root->log_start_pid = 0; /* - * Update or create log root item under the root's log_mutex to prevent - * races with concurrent log syncs that can lead to failure to update - * log root item because it was not created yet. - */ - ret = update_log_root(trans, log); - /* * IO has been started, blocks of the log tree have WRITTEN flag set * in their headers. new modifications of the log will be written to * new positions. so it's safe to allow log writers to go in. @@ -3135,6 +3145,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); mutex_lock(&log_root_tree->log_mutex); + + /* + * Now we are safe to update the log_root_tree because we're under the + * log_mutex, and we're a current writer so we're holding the commit + * open until we drop the log_mutex. + */ + ret = update_log_root(trans, log, &new_root_item); + if (atomic_dec_and_test(&log_root_tree->log_writers)) { /* atomic_dec_and_test implies a barrier */ cond_wake_up_nomb(&log_root_tree->log_writer_wait); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cdd7af424033..bdfe4493e43a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3845,7 +3845,11 @@ static int alloc_profile_is_valid(u64 flags, int extended) return !extended; /* "0" is valid for usual profiles */ /* true if exactly one bit set */ - return is_power_of_2(flags); + /* + * Don't use is_power_of_2(unsigned long) because it won't work + * for the single profile (1ULL << 48) on 32-bit CPUs. + */ + return flags != 0 && (flags & (flags - 1)) == 0; } static inline int balance_need_close(struct btrfs_fs_info *fs_info) |