From 2c7d2a230237e7c43fa067d695937b7e484bb92a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:39:58 -0500 Subject: btrfs: add definition for EXTENT_TREE_V2 This adds the initial definition of the EXTENT_TREE_V2 incompat feature flag. This also hides the support behind CONFIG_BTRFS_DEBUG. THIS IS A IN DEVELOPMENT FORMAT CHANGE, DO NOT USE UNLESS YOU ARE A DEVELOPER OR A TESTER. The format is in flux and will be added in stages, any fs will need to be re-made between updates to the format. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 738619994e26..1cb1a3860f1d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) +#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) struct btrfs_ioctl_feature_flags { __u64 compat_flags; -- cgit v1.2.3 From 9c54e80ddc6bd89596a4046d451908700476fd14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:40:07 -0500 Subject: btrfs: add code to support the block group root This code adds the on disk structures for the block group root, which will hold the block group items for extent tree v2. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 27 ++++++++++++++++++++++- fs/btrfs/disk-io.c | 49 +++++++++++++++++++++++++++++++++++------ fs/btrfs/disk-io.h | 2 ++ fs/btrfs/print-tree.c | 1 + include/trace/events/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 3 +++ 6 files changed, 75 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54424b341132..f460a7bb9ae8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -274,8 +274,14 @@ struct btrfs_super_block { /* the UUID written into btree blocks */ u8 metadata_uuid[BTRFS_FSID_SIZE]; + /* Extent tree v2 */ + __le64 block_group_root; + __le64 block_group_root_generation; + u8 block_group_root_level; + /* future expansion */ - __le64 reserved[28]; + u8 reserved8[7]; + __le64 reserved[25]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; @@ -657,6 +663,7 @@ struct btrfs_fs_info { struct btrfs_root *quota_root; struct btrfs_root *uuid_root; struct btrfs_root *data_reloc_root; + struct btrfs_root *block_group_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -2349,6 +2356,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, num_devices, 64); +/* + * For extent tree v2 we overload the extent root with the block group root, as + * we will have multiple extent roots. + */ +BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup, + extent_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup, + extent_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level, + struct btrfs_root_backup, extent_root_level, 8); + /* struct btrfs_balance_item */ BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); @@ -2483,6 +2501,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, uuid_tree_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block, + block_group_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation, + struct btrfs_super_block, + block_group_root_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block, + block_group_root_level, 8); int btrfs_super_csum_size(const struct btrfs_super_block *s); const char *btrfs_super_csum_name(u16 csum_type); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e014cb8413c7..fe1349737edb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1727,6 +1727,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) btrfs_put_root(fs_info->uuid_root); btrfs_put_root(fs_info->fs_root); btrfs_put_root(fs_info->data_reloc_root); + btrfs_put_root(fs_info->block_group_root); btrfs_check_leaked_roots(fs_info); btrfs_extent_buffer_leak_debug_check(fs_info); kfree(fs_info->super_copy); @@ -2095,7 +2096,6 @@ static void backup_super_roots(struct btrfs_fs_info *info) { const int next_backup = info->backup_root_index; struct btrfs_root_backup *root_backup; - struct btrfs_root *extent_root = btrfs_extent_root(info, 0); struct btrfs_root *csum_root = btrfs_csum_root(info, 0); root_backup = info->super_for_commit->super_roots + next_backup; @@ -2121,11 +2121,23 @@ static void backup_super_roots(struct btrfs_fs_info *info) btrfs_set_backup_chunk_root_level(root_backup, btrfs_header_level(info->chunk_root->node)); - btrfs_set_backup_extent_root(root_backup, extent_root->node->start); - btrfs_set_backup_extent_root_gen(root_backup, - btrfs_header_generation(extent_root->node)); - btrfs_set_backup_extent_root_level(root_backup, - btrfs_header_level(extent_root->node)); + if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) { + btrfs_set_backup_block_group_root(root_backup, + info->block_group_root->node->start); + btrfs_set_backup_block_group_root_gen(root_backup, + btrfs_header_generation(info->block_group_root->node)); + btrfs_set_backup_block_group_root_level(root_backup, + btrfs_header_level(info->block_group_root->node)); + } else { + struct btrfs_root *extent_root = btrfs_extent_root(info, 0); + + btrfs_set_backup_extent_root(root_backup, + extent_root->node->start); + btrfs_set_backup_extent_root_gen(root_backup, + btrfs_header_generation(extent_root->node)); + btrfs_set_backup_extent_root_level(root_backup, + btrfs_header_level(extent_root->node)); + } /* * we might commit during log recovery, which happens before we set @@ -2269,6 +2281,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) free_root_extent_buffers(info->uuid_root); free_root_extent_buffers(info->fs_root); free_root_extent_buffers(info->data_reloc_root); + free_root_extent_buffers(info->block_group_root); if (free_chunk_root) free_root_extent_buffers(info->chunk_root); } @@ -2964,8 +2977,20 @@ static int load_important_roots(struct btrfs_fs_info *fs_info) gen = btrfs_super_generation(sb); level = btrfs_super_root_level(sb); ret = load_super_root(fs_info->tree_root, bytenr, gen, level); - if (ret) + if (ret) { btrfs_warn(fs_info, "couldn't read tree root"); + return ret; + } + + if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) + return 0; + + bytenr = btrfs_super_block_group_root(sb); + gen = btrfs_super_block_group_root_generation(sb); + level = btrfs_super_block_group_root_level(sb); + ret = load_super_root(fs_info->block_group_root, bytenr, gen, level); + if (ret) + btrfs_warn(fs_info, "couldn't read block group root"); return ret; } @@ -2978,6 +3003,16 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) int ret = 0; int i; + if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { + struct btrfs_root *root; + + root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID, + GFP_KERNEL); + if (!root) + return -ENOMEM; + fs_info->block_group_root = root; + } + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { if (handle_error) { if (!IS_ERR(tree_root->node)) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5e8bef4b7563..2e10514ecda8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) { + if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) + return fs_info->block_group_root; return btrfs_extent_root(fs_info, 0); } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0775ae9f4419..524fdb0ddd74 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = { { BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" }, { BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, + { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" }, { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, }; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0d729664b4b4..f068ff30d654 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -53,6 +53,7 @@ struct btrfs_space_info; { BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \ { BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \ { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \ + { BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\ { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }) #define show_root_type(obj) \ diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 5416f1f1a77a..b069752a8ecf 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -53,6 +53,9 @@ /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* Holds the block group items for extent tree v2. */ +#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL + /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL -- cgit v1.2.3 From dcb77a9ae87dc1ae2c54ea2e629da357e694b664 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 15:58:29 -0700 Subject: btrfs: add definitions and documentation for encoded I/O ioctls In order to allow sending and receiving compressed data without decompressing it, we need an interface to write pre-compressed data directly to the filesystem and the matching interface to read compressed data without decompressing it. This adds the definitions for ioctls to do that and detailed explanations of how to use them. Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 1cb1a3860f1d..d956b2993970 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -869,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args { __u8 align[7]; }; +/* + * Data and metadata for an encoded read or write. + * + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g., + * compression). This can be used to read the compressed contents of a file or + * write pre-compressed data directly to a file. + * + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially + * preadv/pwritev with additional metadata about how the data is encoded and the + * size of the unencoded data. + * + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills + * the metadata fields, and returns the size of the encoded data. It reads one + * extent per call. It can also read data which is not encoded. + * + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data + * from the iovecs, and returns the size of the encoded data. Note that the + * encoded data is not validated when it is written; if it is not valid (e.g., + * it cannot be decompressed), then a subsequent read may return an error. + * + * Since the filesystem page cache contains decoded data, encoded I/O bypasses + * the page cache. Encoded I/O requires CAP_SYS_ADMIN. + */ +struct btrfs_ioctl_encoded_io_args { + /* Input parameters for both reads and writes. */ + + /* + * iovecs containing encoded data. + * + * For reads, if the size of the encoded data is larger than the sum of + * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with + * ENOBUFS. + * + * For writes, the size of the encoded data is the sum of iov[n].iov_len + * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may + * increase in the future). This must also be less than or equal to + * unencoded_len. + */ + const struct iovec __user *iov; + /* Number of iovecs. */ + unsigned long iovcnt; + /* + * Offset in file. + * + * For writes, must be aligned to the sector size of the filesystem. + */ + __s64 offset; + /* Currently must be zero. */ + __u64 flags; + + /* + * For reads, the following members are output parameters that will + * contain the returned metadata for the encoded data. + * For writes, the following members must be set to the metadata for the + * encoded data. + */ + + /* + * Length of the data in the file. + * + * Must be less than or equal to unencoded_len - unencoded_offset. For + * writes, must be aligned to the sector size of the filesystem unless + * the data ends at or beyond the current end of the file. + */ + __u64 len; + /* + * Length of the unencoded (i.e., decrypted and decompressed) data. + * + * For writes, must be no more than 128 KiB (this limit may increase in + * the future). If the unencoded data is actually longer than + * unencoded_len, then it is truncated; if it is shorter, then it is + * extended with zeroes. + */ + __u64 unencoded_len; + /* + * Offset from the first byte of the unencoded data to the first byte of + * logical data in the file. + * + * Must be less than unencoded_len. + */ + __u64 unencoded_offset; + /* + * BTRFS_ENCODED_IO_COMPRESSION_* type. + * + * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE. + */ + __u32 compression; + /* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */ + __u32 encryption; + /* + * Reserved for future expansion. + * + * For reads, always returned as zero. Users should check for non-zero + * bytes. If there are any, then the kernel has a newer version of this + * structure with additional information that the user definition is + * missing. + * + * For writes, must be zeroed. + */ + __u8 reserved[64]; +}; + +/* Data is not compressed. */ +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0 +/* Data is compressed as a single zlib stream. */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1 +/* + * Data is compressed as a single zstd frame with the windowLog compression + * parameter set to no more than 17. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2 +/* + * Data is compressed sector by sector (using the sector size indicated by the + * name of the constant) with LZO1X and wrapped in the format documented in + * fs/btrfs/lzo.c. For writes, the compression sector size must match the + * filesystem sector size. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7 +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8 + +/* Data is not encrypted. */ +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 + /* Error codes as returned by the kernel */ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, @@ -997,5 +1125,9 @@ enum btrfs_err_code { struct btrfs_ioctl_ino_lookup_user_args) #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3