From 41a2ee75aab0290a5899677437736ec715dcd1b6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jan 2020 09:02:21 -0500 Subject: btrfs: introduce per-inode file extent tree In order to keep track of where we have file extents on disk, and thus where it is safe to adjust the i_size to, we need to have a tree in place to keep track of the contiguous areas we have file extents for. Add helpers to use this tree, as it's not required for NO_HOLES file systems. We will use this by setting DIRTY for areas we know we have file extent item's set, and clearing it when we remove file extent items for truncation. Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 17088a112ed0..16ade35e8170 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -88,6 +88,7 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ { IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \ { IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \ + { IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \ { IO_TREE_SELFTEST, "SELFTEST" }) #define BTRFS_GROUP_FLAGS \ -- cgit v1.2.3 From 3f1c64ce04387773d2b0d8ef6a7e573ff80e4436 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jan 2020 09:02:24 -0500 Subject: btrfs: delete the ordered isize update code Now that we have a safe way to update the isize, remove all of this code as it's no longer needed. Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 128 ------------------------------------------- fs/btrfs/ordered-data.h | 7 --- include/trace/events/btrfs.h | 1 - 3 files changed, 136 deletions(-) (limited to 'include') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a65f189a5b94..66170c8cea6f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -785,134 +785,6 @@ out: return entry; } -/* - * After an extent is done, call this to conditionally update the on disk - * i_size. i_size is updated to cover any fully written part of the file. - */ -int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, - struct btrfs_ordered_extent *ordered) -{ - struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; - u64 disk_i_size; - u64 new_i_size; - u64 i_size = i_size_read(inode); - struct rb_node *node; - struct rb_node *prev = NULL; - struct btrfs_ordered_extent *test; - int ret = 1; - u64 orig_offset = offset; - - spin_lock_irq(&tree->lock); - if (ordered) { - offset = entry_end(ordered); - if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) - offset = min(offset, - ordered->file_offset + - ordered->truncated_len); - } else { - offset = ALIGN(offset, btrfs_inode_sectorsize(inode)); - } - disk_i_size = BTRFS_I(inode)->disk_i_size; - - /* - * truncate file. - * If ordered is not NULL, then this is called from endio and - * disk_i_size will be updated by either truncate itself or any - * in-flight IOs which are inside the disk_i_size. - * - * Because btrfs_setsize() may set i_size with disk_i_size if truncate - * fails somehow, we need to make sure we have a precise disk_i_size by - * updating it as usual. - * - */ - if (!ordered && disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = orig_offset; - ret = 0; - goto out; - } - - /* - * if the disk i_size is already at the inode->i_size, or - * this ordered extent is inside the disk i_size, we're done - */ - if (disk_i_size == i_size) - goto out; - - /* - * We still need to update disk_i_size if outstanding_isize is greater - * than disk_i_size. - */ - if (offset <= disk_i_size && - (!ordered || ordered->outstanding_isize <= disk_i_size)) - goto out; - - /* - * walk backward from this ordered extent to disk_i_size. - * if we find an ordered extent then we can't update disk i_size - * yet - */ - if (ordered) { - node = rb_prev(&ordered->rb_node); - } else { - prev = tree_search(tree, offset); - /* - * we insert file extents without involving ordered struct, - * so there should be no ordered struct cover this offset - */ - if (prev) { - test = rb_entry(prev, struct btrfs_ordered_extent, - rb_node); - BUG_ON(offset_in_entry(test, offset)); - } - node = prev; - } - for (; node; node = rb_prev(node)) { - test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - - /* We treat this entry as if it doesn't exist */ - if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) - continue; - - if (entry_end(test) <= disk_i_size) - break; - if (test->file_offset >= i_size) - break; - - /* - * We don't update disk_i_size now, so record this undealt - * i_size. Or we will not know the real i_size. - */ - if (test->outstanding_isize < offset) - test->outstanding_isize = offset; - if (ordered && - ordered->outstanding_isize > test->outstanding_isize) - test->outstanding_isize = ordered->outstanding_isize; - goto out; - } - new_i_size = min_t(u64, offset, i_size); - - /* - * Some ordered extents may completed before the current one, and - * we hold the real i_size in ->outstanding_isize. - */ - if (ordered && ordered->outstanding_isize > new_i_size) - new_i_size = min_t(u64, ordered->outstanding_isize, i_size); - BTRFS_I(inode)->disk_i_size = new_i_size; - ret = 0; -out: - /* - * We need to do this because we can't remove ordered extents until - * after the i_disk_size has been updated and then the inode has been - * updated to reflect the change, so we need to tell anybody who finds - * this ordered extent that we've already done all the real work, we - * just haven't completed all the other work. - */ - if (ordered) - set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); - spin_unlock_irq(&tree->lock); - return ret; -} - /* * search the ordered extents for one corresponding to 'offset' and * try to find a checksum. This is used because we allow pages to diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3beb4da4ab41..a46f319d9ae0 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -52,11 +52,6 @@ enum { BTRFS_ORDERED_DIRECT, /* We had an io error when writing this out */ BTRFS_ORDERED_IOERR, - /* - * indicates whether this ordered extent has done its due diligence in - * updating the isize - */ - BTRFS_ORDERED_UPDATED_ISIZE, /* Set when we have to truncate an extent */ BTRFS_ORDERED_TRUNCATED, /* Regular IO for COW */ @@ -182,8 +177,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range( struct btrfs_inode *inode, u64 file_offset, u64 len); -int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, - struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u8 *sum, int len); u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 16ade35e8170..f1f2b6a04052 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -469,7 +469,6 @@ DEFINE_EVENT( { (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \ { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \ { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \ - { (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \ { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" }) -- cgit v1.2.3 From fe119a6eeb670585e29dbe3932e00ad29ae8f5f9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:18 +0200 Subject: btrfs: switch to per-transaction pinned extents This commit flips the switch to start tracking/processing pinned extents on a per-transaction basis. It mostly replaces all references from btrfs_fs_info::(pinned_extents|freed_extents[]) to btrfs_transaction::pinned_extents. Two notable modifications that warrant explicit mention are changing clean_pinned_extents to get a reference to the previously running transaction. The other one is removal of call to btrfs_destroy_pinned_extent since transactions are going to be cleaned in btrfs_cleanup_one_transaction. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 38 +++++++++++++++++++++++++------------- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/disk-io.c | 30 +++++------------------------- fs/btrfs/extent-io-tree.h | 4 ++-- fs/btrfs/extent-tree.c | 31 ++++++++----------------------- fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/transaction.c | 2 ++ fs/btrfs/transaction.h | 1 + include/trace/events/btrfs.h | 4 ++-- 9 files changed, 48 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9fec78a8c759..b8f39a679064 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -460,7 +460,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end int ret; while (start < end) { - ret = find_first_extent_bit(info->pinned_extents, start, + ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, EXTENT_DIRTY | EXTENT_UPTODATE, NULL); @@ -1248,30 +1248,42 @@ out: return ret; } -static bool clean_pinned_extents(struct btrfs_block_group *bg) +static bool clean_pinned_extents(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_transaction *prev_trans = NULL; const u64 start = bg->start; const u64 end = start + bg->length - 1; int ret; + spin_lock(&fs_info->trans_lock); + if (trans->transaction->list.prev != &fs_info->trans_list) { + prev_trans = list_last_entry(&trans->transaction->list, + struct btrfs_transaction, list); + refcount_inc(&prev_trans->use_count); + } + spin_unlock(&fs_info->trans_lock); + /* * Hold the unused_bg_unpin_mutex lock to avoid racing with * btrfs_finish_extent_commit(). If we are at transaction N, another * task might be running finish_extent_commit() for the previous * transaction N - 1, and have seen a range belonging to the block - * group in freed_extents[] before we were able to clear the whole - * block group range from freed_extents[]. This means that task can - * lookup for the block group after we unpinned it from freed_extents - * and removed it, leading to a BUG_ON() at unpin_extent_range(). + * group in pinned_extents before we were able to clear the whole block + * group range from pinned_extents. This means that task can lookup for + * the block group after we unpinned it from pinned_extents and removed + * it, leading to a BUG_ON() at unpin_extent_range(). */ mutex_lock(&fs_info->unused_bg_unpin_mutex); - ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, - EXTENT_DIRTY); - if (ret) - goto err; + if (prev_trans) { + ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY); + if (ret) + goto err; + } - ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, + ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, EXTENT_DIRTY); if (ret) goto err; @@ -1380,7 +1392,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * We could have pending pinned extents for this block group, * just delete them, we don't care about them anymore. */ - if (!clean_pinned_extents(block_group)) + if (!clean_pinned_extents(trans, block_group)) goto end_trans; /* @@ -2890,7 +2902,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, &cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(info->pinned_extents, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 22d0cb0019d1..bb237d577725 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,8 +596,8 @@ struct btrfs_fs_info { /* keep track of unallocated space */ atomic64_t free_chunk_space; - struct extent_io_tree freed_extents[2]; - struct extent_io_tree *pinned_extents; + /* Track ranges which are used by log trees blocks/logged data extents */ + struct extent_io_tree excluded_extents; /* logical->physical extent mapping */ struct extent_map_tree mapping_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 194c98a61095..e1e111c8b08b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2075,10 +2075,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) btrfs_free_log_root_tree(NULL, fs_info); - btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); - } } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) @@ -2749,11 +2747,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->block_group_cache_tree = RB_ROOT; fs_info->first_logical_byte = (u64)-1; - extent_io_tree_init(fs_info, &fs_info->freed_extents[0], - IO_TREE_FS_INFO_FREED_EXTENTS0, NULL); - extent_io_tree_init(fs_info, &fs_info->freed_extents[1], - IO_TREE_FS_INFO_FREED_EXTENTS1, NULL); - fs_info->pinned_extents = &fs_info->freed_extents[0]; + extent_io_tree_init(fs_info, &fs_info->excluded_extents, + IO_TREE_FS_EXCLUDED_EXTENTS, NULL); set_bit(BTRFS_FS_BARRIER, &fs_info->flags); mutex_init(&fs_info->ordered_operations_mutex); @@ -4434,16 +4429,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, } static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, - struct extent_io_tree *pinned_extents) + struct extent_io_tree *unpin) { - struct extent_io_tree *unpin; u64 start; u64 end; int ret; - bool loop = true; - unpin = pinned_extents; -again: while (1) { struct extent_state *cached_state = NULL; @@ -4468,15 +4459,6 @@ again: cond_resched(); } - if (loop) { - if (unpin == &fs_info->freed_extents[0]) - unpin = &fs_info->freed_extents[1]; - else - unpin = &fs_info->freed_extents[0]; - loop = false; - goto again; - } - return 0; } @@ -4567,8 +4549,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); - btrfs_destroy_pinned_extent(fs_info, - fs_info->pinned_extents); + btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); @@ -4620,7 +4601,6 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) btrfs_destroy_all_ordered_extents(fs_info); btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); - btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); btrfs_destroy_all_delalloc_inodes(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index cc3037f9765e..b4a7bad3e82e 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -36,8 +36,8 @@ struct io_failure_record; #define CHUNK_TRIMMED EXTENT_DEFRAG enum { - IO_TREE_FS_INFO_FREED_EXTENTS0, - IO_TREE_FS_INFO_FREED_EXTENTS1, + IO_TREE_FS_PINNED_EXTENTS, + IO_TREE_FS_EXCLUDED_EXTENTS, IO_TREE_INODE_IO, IO_TREE_INODE_IO_FAILURE, IO_TREE_RELOC_BLOCKS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f97e631aaca5..136fffb76428 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -64,10 +64,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, u64 start, u64 num_bytes) { u64 end = start + num_bytes - 1; - set_extent_bits(&fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE); - set_extent_bits(&fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE); + set_extent_bits(&fs_info->excluded_extents, start, end, + EXTENT_UPTODATE); return 0; } @@ -79,10 +77,8 @@ void btrfs_free_excluded_extents(struct btrfs_block_group *cache) start = cache->start; end = start + cache->length - 1; - clear_extent_bits(&fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE); - clear_extent_bits(&fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE); + clear_extent_bits(&fs_info->excluded_extents, start, end, + EXTENT_UPTODATE); } static u64 generic_ref_to_space_flags(struct btrfs_ref *ref) @@ -2605,7 +2601,7 @@ static int pin_down_extent(struct btrfs_trans_handle *trans, percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(fs_info->pinned_extents, bytenr, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); return 0; } @@ -2761,11 +2757,6 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) } } - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) - fs_info->pinned_extents = &fs_info->freed_extents[1]; - else - fs_info->pinned_extents = &fs_info->freed_extents[0]; - up_write(&fs_info->commit_root_sem); btrfs_update_global_block_rsv(fs_info); @@ -2906,10 +2897,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) u64 end; int ret; - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) - unpin = &fs_info->freed_extents[1]; - else - unpin = &fs_info->freed_extents[0]; + unpin = &trans->transaction->pinned_extents; while (!TRANS_ABORTED(trans)) { struct extent_state *cached_state = NULL; @@ -2921,12 +2909,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } - if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { - clear_extent_bits(&fs_info->freed_extents[0], start, + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) + clear_extent_bits(&fs_info->excluded_extents, start, end, EXTENT_UPTODATE); - clear_extent_bits(&fs_info->freed_extents[1], start, - end, EXTENT_UPTODATE); - } if (btrfs_test_opt(fs_info, DISCARD_SYNC)) ret = btrfs_discard_extent(fs_info, start, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9d6372139547..bd9c4b5da549 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1086,7 +1086,7 @@ static noinline_for_stack int write_pinned_extent_entries( * We shouldn't have switched the pinned extents yet so this is the * right one */ - unpin = block_group->fs_info->pinned_extents; + unpin = &trans->transaction->pinned_extents; start = block_group->start; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 37680351b7c3..fdfdfc426539 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -336,6 +336,8 @@ loop: list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); + extent_io_tree_init(fs_info, &cur_trans->pinned_extents, + IO_TREE_FS_PINNED_EXTENTS, NULL); fs_info->generation++; cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 453cea7c7a72..31ae8d273065 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -71,6 +71,7 @@ struct btrfs_transaction { */ struct list_head io_bgs; struct list_head dropped_roots; + struct extent_io_tree pinned_extents; /* * we need to make sure block group deletion doesn't race with diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f1f2b6a04052..bcbc763b8814 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -81,8 +81,8 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); #define show_extent_io_tree_owner(owner) \ __print_symbolic(owner, \ - { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" }, \ - { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" }, \ + { IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS" }, \ + { IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS" }, \ { IO_TREE_INODE_IO, "INODE_IO" }, \ { IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \ { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ -- cgit v1.2.3 From eed0269053cd37ea6ec76ad10cdd2bccc4b807a1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 21 Feb 2020 13:16:33 +0100 Subject: btrfs: define support masks for ioctl volume args v2 The ioctl data for devices or subvolumes can be passed via btrfs_ioctl_vol_args or btrfs_ioctl_vol_args_v2. The latter is more versatile and needs some caution as some of the flags make sense only for some ioctls. As we're going to extend the flags, define support masks for each ioctl class separately. Reviewed-by: Marcos Paulo de Souza Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 7a8bc8b920f5..49ed71df5e94 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -97,16 +97,26 @@ struct btrfs_ioctl_qgroup_limit_args { }; /* - * flags for subvolumes + * Arguments for specification of subvolumes or devices, supporting by-name or + * by-id and flags * - * Used by: - * struct btrfs_ioctl_vol_args_v2.flags + * The set of supported flags depends on the ioctl * * BTRFS_SUBVOL_RDONLY is also provided/consumed by the following ioctls: * - BTRFS_IOC_SUBVOL_GETFLAGS * - BTRFS_IOC_SUBVOL_SETFLAGS */ +/* Supported flags for BTRFS_IOC_RM_DEV_V2 */ +#define BTRFS_DEVICE_REMOVE_ARGS_MASK \ + (BTRFS_DEVICE_SPEC_BY_ID) + +/* Supported flags for BTRFS_IOC_SNAP_CREATE_V2 and BTRFS_IOC_SUBVOL_CREATE_V2 */ +#define BTRFS_SUBVOL_CREATE_ARGS_MASK \ + (BTRFS_SUBVOL_CREATE_ASYNC | \ + BTRFS_SUBVOL_RDONLY | \ + BTRFS_SUBVOL_QGROUP_INHERIT) + struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; -- cgit v1.2.3 From 949964c928430a42e0d4f514d888a58a201b6fcb Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Fri, 7 Feb 2020 10:05:46 -0300 Subject: btrfs: add new BTRFS_IOC_SNAP_DESTROY_V2 ioctl This ioctl will be responsible for deleting a subvolume using its id. This can be used when a system has a file system mounted from a subvolume, rather than the root file system, like below: / @subvol1/ @subvol2/ @subvol_default/ If only @subvol_default is mounted, we have no path to reach @subvol1 and @subvol2, thus no way to delete them. Current subvolume delete ioctl takes a file handle point as argument, and if @subvol_default is mounted, we can't reach @subvol1 and @subvol2 from the same mount point. This patch introduces a new ioctl BTRFS_IOC_SNAP_DESTROY_V2 that takes the extended structure with flags to allow to delete subvolume using subvolid. Now, we can use this new ioctl specifying the subvolume id and refer to the same mount point. It doesn't matter which subvolume was mounted, since we can reach to the desired one using the subvolume id, and then delete it. The full path to the subvolume id is resolved internally and access is verified as if the subvolume was accessed by path. The volume args v2 structure is extended to use the existing union for subvolume id specification, that's valid in case the BTRFS_SUBVOL_SPEC_BY_ID is set. Signed-off-by: Marcos Paulo de Souza Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 136 ++++++++++++++++++++++++++++++++++++++------- include/uapi/linux/btrfs.h | 12 +++- 2 files changed, 127 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7a7d5d4753cd..56bd3ea7fb67 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -28,6 +28,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "export.h" #include "transaction.h" #include "btrfs_inode.h" #include "print-tree.h" @@ -2842,7 +2843,8 @@ out: } static noinline int btrfs_ioctl_snap_destroy(struct file *file, - void __user *arg) + void __user *arg, + bool destroy_v2) { struct dentry *parent = file->f_path.dentry; struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb); @@ -2851,34 +2853,120 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *dest = NULL; - struct btrfs_ioctl_vol_args *vol_args; - int namelen; + struct btrfs_ioctl_vol_args *vol_args = NULL; + struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL; + char *subvol_name, *subvol_name_ptr = NULL; + int subvol_namelen; int err = 0; + bool destroy_parent = false; - if (!S_ISDIR(dir->i_mode)) - return -ENOTDIR; + if (destroy_v2) { + vol_args2 = memdup_user(arg, sizeof(*vol_args2)); + if (IS_ERR(vol_args2)) + return PTR_ERR(vol_args2); - vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) - return PTR_ERR(vol_args); + if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) { + err = -EOPNOTSUPP; + goto out; + } - vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - namelen = strlen(vol_args->name); - if (strchr(vol_args->name, '/') || - strncmp(vol_args->name, "..", namelen) == 0) { - err = -EINVAL; - goto out; + /* + * If SPEC_BY_ID is not set, we are looking for the subvolume by + * name, same as v1 currently does. + */ + if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) { + vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0; + subvol_name = vol_args2->name; + + err = mnt_want_write_file(file); + if (err) + goto out; + } else { + if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) { + err = -EINVAL; + goto out; + } + + err = mnt_want_write_file(file); + if (err) + goto out; + + dentry = btrfs_get_dentry(fs_info->sb, + BTRFS_FIRST_FREE_OBJECTID, + vol_args2->subvolid, 0, 0); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out_drop_write; + } + + /* + * Change the default parent since the subvolume being + * deleted can be outside of the current mount point. + */ + parent = btrfs_get_parent(dentry); + + /* + * At this point dentry->d_name can point to '/' if the + * subvolume we want to destroy is outsite of the + * current mount point, so we need to release the + * current dentry and execute the lookup to return a new + * one with ->d_name pointing to the + * /subvol_name. + */ + dput(dentry); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + goto out_drop_write; + } + dir = d_inode(parent); + + /* + * If v2 was used with SPEC_BY_ID, a new parent was + * allocated since the subvolume can be outside of the + * current mount point. Later on we need to release this + * new parent dentry. + */ + destroy_parent = true; + + subvol_name_ptr = btrfs_get_subvol_name_from_objectid( + fs_info, vol_args2->subvolid); + if (IS_ERR(subvol_name_ptr)) { + err = PTR_ERR(subvol_name_ptr); + goto free_parent; + } + /* subvol_name_ptr is already NULL termined */ + subvol_name = (char *)kbasename(subvol_name_ptr); + } + } else { + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + + vol_args->name[BTRFS_PATH_NAME_MAX] = 0; + subvol_name = vol_args->name; + + err = mnt_want_write_file(file); + if (err) + goto out; } - err = mnt_want_write_file(file); - if (err) - goto out; + subvol_namelen = strlen(subvol_name); + if (strchr(subvol_name, '/') || + strncmp(subvol_name, "..", subvol_namelen) == 0) { + err = -EINVAL; + goto free_subvol_name; + } + + if (!S_ISDIR(dir->i_mode)) { + err = -ENOTDIR; + goto free_subvol_name; + } err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); if (err == -EINTR) - goto out_drop_write; - dentry = lookup_one_len(vol_args->name, parent, namelen); + goto free_subvol_name; + dentry = lookup_one_len(subvol_name, parent, subvol_namelen); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out_unlock_dir; @@ -2947,9 +3035,15 @@ out_dput: dput(dentry); out_unlock_dir: inode_unlock(dir); +free_subvol_name: + kfree(subvol_name_ptr); +free_parent: + if (destroy_parent) + dput(parent); out_drop_write: mnt_drop_write_file(file); out: + kfree(vol_args2); kfree(vol_args); return err; } @@ -5474,7 +5568,9 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SUBVOL_CREATE_V2: return btrfs_ioctl_snap_create_v2(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: - return btrfs_ioctl_snap_destroy(file, argp); + return btrfs_ioctl_snap_destroy(file, argp, false); + case BTRFS_IOC_SNAP_DESTROY_V2: + return btrfs_ioctl_snap_destroy(file, argp, true); case BTRFS_IOC_SUBVOL_GETFLAGS: return btrfs_ioctl_subvol_getflags(file, argp); case BTRFS_IOC_SUBVOL_SETFLAGS: diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 49ed71df5e94..b5f3ea36d3cb 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -42,11 +42,14 @@ struct btrfs_ioctl_vol_args { #define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3) +#define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4) + #define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \ (BTRFS_SUBVOL_CREATE_ASYNC | \ BTRFS_SUBVOL_RDONLY | \ BTRFS_SUBVOL_QGROUP_INHERIT | \ - BTRFS_DEVICE_SPEC_BY_ID) + BTRFS_DEVICE_SPEC_BY_ID | \ + BTRFS_SUBVOL_SPEC_BY_ID) #define BTRFS_FSID_SIZE 16 #define BTRFS_UUID_SIZE 16 @@ -117,6 +120,10 @@ struct btrfs_ioctl_qgroup_limit_args { BTRFS_SUBVOL_RDONLY | \ BTRFS_SUBVOL_QGROUP_INHERIT) +/* Supported flags for BTRFS_IOC_SNAP_DESTROY_V2 */ +#define BTRFS_SUBVOL_DELETE_ARGS_MASK \ + (BTRFS_SUBVOL_SPEC_BY_ID) + struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; @@ -131,6 +138,7 @@ struct btrfs_ioctl_vol_args_v2 { union { char name[BTRFS_SUBVOL_NAME_MAX + 1]; __u64 devid; + __u64 subvolid; }; }; @@ -959,5 +967,7 @@ enum btrfs_err_code { struct btrfs_ioctl_get_subvol_rootref_args) #define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \ struct btrfs_ioctl_ino_lookup_user_args) +#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ + struct btrfs_ioctl_vol_args_v2) #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From d01cd62400b3a42d012069518614d944c35f9c99 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Feb 2020 17:37:49 +0200 Subject: uuid: Add inline helpers to import / export UUIDs Sometimes we may need to import UUID from or export to the raw buffer, which is provided outside of kernel and can't be declared as UUID type. With current API this operation will require an explicit casting to one of UUID types and length, that is always a constant derived as sizeof the certain UUID type. Provide a helpful set of inline helpers to minimize developer's effort in the cases when raw buffers are involved. Suggested-by: David Sterba Acked-by: Christoph Hellwig Signed-off-by: Andy Shevchenko Signed-off-by: David Sterba --- include/linux/uuid.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 0c631e2a73b6..8e4a5000da03 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -43,6 +43,16 @@ static inline void guid_copy(guid_t *dst, const guid_t *src) memcpy(dst, src, sizeof(guid_t)); } +static inline void import_guid(guid_t *dst, const __u8 *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + +static inline void export_guid(__u8 *dst, const guid_t *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + static inline bool guid_is_null(const guid_t *guid) { return guid_equal(guid, &guid_null); @@ -58,6 +68,16 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src) memcpy(dst, src, sizeof(uuid_t)); } +static inline void import_uuid(uuid_t *dst, const __u8 *src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + +static inline void export_uuid(__u8 *dst, const uuid_t *src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + static inline bool uuid_is_null(const uuid_t *uuid) { return uuid_equal(uuid, &uuid_null); -- cgit v1.2.3 From 48a2e88f53aea4dface64883157ad3c428132f75 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Feb 2020 17:37:50 +0200 Subject: uuid: Provide a GUID generator for raw buffer In some cases we would like to generate a GUID and export it. Though it would require either casting to internal kernel types or an intermediate buffer. Instead we may achieve this by supplying a pointer to raw buffer and make a complimentary API to existing one for UUIDs. Reviewed-by: Christoph Hellwig Signed-off-by: Andy Shevchenko Signed-off-by: David Sterba --- include/linux/uuid.h | 1 + lib/uuid.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 8e4a5000da03..3780460a9a85 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -84,6 +84,7 @@ static inline bool uuid_is_null(const uuid_t *uuid) } void generate_random_uuid(unsigned char uuid[16]); +void generate_random_guid(unsigned char guid[16]); extern void guid_gen(guid_t *u); extern void uuid_gen(uuid_t *u); diff --git a/lib/uuid.c b/lib/uuid.c index b6a1edb61d87..562d53977cab 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -40,6 +40,16 @@ void generate_random_uuid(unsigned char uuid[16]) } EXPORT_SYMBOL(generate_random_uuid); +void generate_random_guid(unsigned char guid[16]) +{ + get_random_bytes(guid, 16); + /* Set GUID version to 4 --- truly random generation */ + guid[7] = (guid[7] & 0x0F) | 0x40; + /* Set the GUID variant to DCE */ + guid[8] = (guid[8] & 0x3F) | 0x80; +} +EXPORT_SYMBOL(generate_random_guid); + static void __uuid_gen_common(__u8 b[16]) { prandom_bytes(b, 16); -- cgit v1.2.3 From 86eba9d34c41d5c2cffd82febafa34de574da6f7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Feb 2020 17:37:52 +0200 Subject: uuid: Remove no more needed macro uuid_le_gen() is no used anymore, remove it for good. Reviewed-by: Christoph Hellwig Signed-off-by: Andy Shevchenko Signed-off-by: David Sterba --- include/linux/uuid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 3780460a9a85..d41b0d3e9474 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -98,7 +98,6 @@ int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); /* backwards compatibility, don't use in new code */ -#define uuid_le_gen(u) guid_gen(u) #define uuid_le_to_bin(guid, u) guid_parse(guid, u) static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) -- cgit v1.2.3 From 9c1036fdb1d1ff1b09d03c8db60f4dc67cc6614e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 13 Mar 2020 17:23:18 +0200 Subject: btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support This functionality was deprecated in kernel 5.4. Since no one has complained of the impending removal it's time we did so. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba [ add comment ] Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 25 +------------------------ include/uapi/linux/btrfs.h | 13 ++++++++----- 2 files changed, 9 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f8a73a28022a..7e09985d566e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1818,8 +1818,6 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, { struct btrfs_ioctl_vol_args_v2 *vol_args; int ret; - u64 transid = 0; - u64 *ptr = NULL; bool readonly = false; struct btrfs_qgroup_inherit *inherit = NULL; @@ -1836,15 +1834,6 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, goto free_args; } - if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) { - struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - - btrfs_warn(fs_info, -"SNAP_CREATE_V2 ioctl with CREATE_ASYNC is deprecated and will be removed in kernel 5.7"); - - ptr = &transid; - } if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { @@ -1860,17 +1849,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, } ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, - vol_args->fd, subvol, ptr, + vol_args->fd, subvol, NULL, readonly, inherit); if (ret) goto free_inherit; - - if (ptr && copy_to_user(arg + - offsetof(struct btrfs_ioctl_vol_args_v2, - transid), - ptr, sizeof(*ptr))) - ret = -EFAULT; - free_inherit: kfree(inherit); free_args: @@ -1929,11 +1911,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, goto out_drop_write; } - if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { - ret = -EINVAL; - goto out_drop_write; - } - if (flags & ~BTRFS_SUBVOL_RDONLY) { ret = -EOPNOTSUPP; goto out_drop_write; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index b5f3ea36d3cb..8134924cfc17 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -36,7 +36,12 @@ struct btrfs_ioctl_vol_args { #define BTRFS_DEVICE_PATH_NAME_MAX 1024 #define BTRFS_SUBVOL_NAME_MAX 4039 -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +/* + * Deprecated since 5.7: + * + * BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) + */ + #define BTRFS_SUBVOL_RDONLY (1ULL << 1) #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) @@ -45,8 +50,7 @@ struct btrfs_ioctl_vol_args { #define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4) #define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \ - (BTRFS_SUBVOL_CREATE_ASYNC | \ - BTRFS_SUBVOL_RDONLY | \ + (BTRFS_SUBVOL_RDONLY | \ BTRFS_SUBVOL_QGROUP_INHERIT | \ BTRFS_DEVICE_SPEC_BY_ID | \ BTRFS_SUBVOL_SPEC_BY_ID) @@ -116,8 +120,7 @@ struct btrfs_ioctl_qgroup_limit_args { /* Supported flags for BTRFS_IOC_SNAP_CREATE_V2 and BTRFS_IOC_SUBVOL_CREATE_V2 */ #define BTRFS_SUBVOL_CREATE_ARGS_MASK \ - (BTRFS_SUBVOL_CREATE_ASYNC | \ - BTRFS_SUBVOL_RDONLY | \ + (BTRFS_SUBVOL_RDONLY | \ BTRFS_SUBVOL_QGROUP_INHERIT) /* Supported flags for BTRFS_IOC_SNAP_DESTROY_V2 */ -- cgit v1.2.3