From 0e0078f72be81bbb2a02b229fd2cec8ad63e4fb1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Nov 2022 18:47:48 +0100 Subject: btrfs: factor out scratching of one regular super block btrfs_scratch_superblocks open codes scratching super block of a non-zoned super block. Split the code to read, zero and write the superblock for regular devices into a separate helper. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bf0decaac7f3..119dc26ca141 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2014,42 +2014,43 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) return num_devices; } +static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, + struct block_device *bdev, int copy_num) +{ + struct btrfs_super_block *disk_super; + struct page *page; + int ret; + + disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); + if (IS_ERR(disk_super)) + return; + + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + page = virt_to_page(disk_super); + set_page_dirty(page); + lock_page(page); + /* write_on_page() unlocks the page */ + ret = write_one_page(page); + if (ret) + btrfs_warn(fs_info, "error clearing superblock number %d (%d)", + copy_num, ret); + btrfs_release_disk_super(disk_super); +} + void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct block_device *bdev, const char *device_path) { - struct btrfs_super_block *disk_super; int copy_num; if (!bdev) return; for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) { - struct page *page; - int ret; - - disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); - if (IS_ERR(disk_super)) - continue; - - if (bdev_is_zoned(bdev)) { + if (bdev_is_zoned(bdev)) btrfs_reset_sb_log_zones(bdev, copy_num); - continue; - } - - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - - page = virt_to_page(disk_super); - set_page_dirty(page); - lock_page(page); - /* write_on_page() unlocks the page */ - ret = write_one_page(page); - if (ret) - btrfs_warn(fs_info, - "error clearing superblock number %d (%d)", - copy_num, ret); - btrfs_release_disk_super(disk_super); - + else + btrfs_scratch_superblock(fs_info, bdev, copy_num); } /* Notify udev that device has changed */ -- cgit v1.2.3 From 26ecf243e407be54807ad67210f7e83b9fad71ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Nov 2022 18:47:49 +0100 Subject: btrfs: stop using write_one_page in btrfs_scratch_superblock write_one_page is an awkward interface that expects the page locked and ->writepage to be implemented. Replace that by zeroing the signature bytes and synchronize the block device page using the proper bdev helpers. Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 119dc26ca141..bcfef75b97da 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2018,23 +2018,22 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, struct block_device *bdev, int copy_num) { struct btrfs_super_block *disk_super; - struct page *page; + const size_t len = sizeof(disk_super->magic); + const u64 bytenr = btrfs_sb_offset(copy_num); int ret; - disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); + disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr); if (IS_ERR(disk_super)) return; - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - page = virt_to_page(disk_super); - set_page_dirty(page); - lock_page(page); - /* write_on_page() unlocks the page */ - ret = write_one_page(page); + memset(&disk_super->magic, 0, len); + folio_mark_dirty(virt_to_folio(disk_super)); + btrfs_release_disk_super(disk_super); + + ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1); if (ret) btrfs_warn(fs_info, "error clearing superblock number %d (%d)", copy_num, ret); - btrfs_release_disk_super(disk_super); } void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, -- cgit v1.2.3 From 1f55ee6d0901d915801618bda0af4e5b937e3db7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jan 2023 14:17:20 +0000 Subject: btrfs: fix invalid leaf access due to inline extent during lseek During lseek, for SEEK_DATA and SEEK_HOLE modes, we access the disk_bytenr of an extent without checking its type. However inline extents have their data starting the offset of the disk_bytenr field, so accessing that field when we have an inline extent can result in either of the following: 1) Interpret the inline extent's data as a disk_bytenr value; 2) In case the inline data is less than 8 bytes, we access part of some other item in the leaf, or unused space in the leaf; 3) In case the inline data is less than 8 bytes and the extent item is the first item in the leaf, we can access beyond the leaf's limit. So fix this by not accessing the disk_bytenr field if we have an inline extent. Fixes: b6e833567ea1 ("btrfs: make hole and data seeking a lot more efficient") Reported-by: Matthias Schoepfer Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216908 Link: https://lore.kernel.org/linux-btrfs/7f25442f-b121-2a3a-5a3d-22bcaae83cd4@leemhuis.info/ CC: stable@vger.kernel.org # 6.1 Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 834bbcb91102..af046d22300e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3541,6 +3541,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *extent; u64 extent_end; + u8 type; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); @@ -3596,10 +3597,16 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence) extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + type = btrfs_file_extent_type(leaf, extent); - if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 || - btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_PREALLOC) { + /* + * Can't access the extent's disk_bytenr field if this is an + * inline extent, since at that offset, it's where the extent + * data starts. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC || + (type == BTRFS_FILE_EXTENT_REG && + btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) { /* * Explicit hole or prealloc extent, search for delalloc. * A prealloc extent is treated like a hole. -- cgit v1.2.3 From b7adbf9ada3513d2092362c8eac5cddc5b651f5c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jan 2023 16:31:08 +0000 Subject: btrfs: fix race between quota rescan and disable leading to NULL pointer deref If we have one task trying to start the quota rescan worker while another one is trying to disable quotas, we can end up hitting a race that results in the quota rescan worker doing a NULL pointer dereference. The steps for this are the following: 1) Quotas are enabled; 2) Task A calls the quota rescan ioctl and enters btrfs_qgroup_rescan(). It calls qgroup_rescan_init() which returns 0 (success) and then joins a transaction and commits it; 3) Task B calls the quota disable ioctl and enters btrfs_quota_disable(). It clears the bit BTRFS_FS_QUOTA_ENABLED from fs_info->flags and calls btrfs_qgroup_wait_for_completion(), which returns immediately since the rescan worker is not yet running. Then it starts a transaction and locks fs_info->qgroup_ioctl_lock; 4) Task A queues the rescan worker, by calling btrfs_queue_work(); 5) The rescan worker starts, and calls rescan_should_stop() at the start of its while loop, which results in 0 iterations of the loop, since the flag BTRFS_FS_QUOTA_ENABLED was cleared from fs_info->flags by task B at step 3); 6) Task B sets fs_info->quota_root to NULL; 7) The rescan worker tries to start a transaction and uses fs_info->quota_root as the root argument for btrfs_start_transaction(). This results in a NULL pointer dereference down the call chain of btrfs_start_transaction(). The stack trace is something like the one reported in Link tag below: general protection fault, probably for non-canonical address 0xdffffc0000000041: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000208-0x000000000000020f] CPU: 1 PID: 34 Comm: kworker/u4:2 Not tainted 6.1.0-syzkaller-13872-gb6bb9676f216 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: btrfs-qgroup-rescan btrfs_work_helper RIP: 0010:start_transaction+0x48/0x10f0 fs/btrfs/transaction.c:564 Code: 48 89 fb 48 (...) RSP: 0018:ffffc90000ab7ab0 EFLAGS: 00010206 RAX: 0000000000000041 RBX: 0000000000000208 RCX: ffff88801779ba80 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: dffffc0000000000 R08: 0000000000000001 R09: fffff52000156f5d R10: fffff52000156f5d R11: 1ffff92000156f5c R12: 0000000000000000 R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2bea75b718 CR3: 000000001d0cc000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_qgroup_rescan_worker+0x3bb/0x6a0 fs/btrfs/qgroup.c:3402 btrfs_work_helper+0x312/0x850 fs/btrfs/async-thread.c:280 process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 kthread+0x266/0x300 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 Modules linked in: So fix this by having the rescan worker function not attempt to start a transaction if it didn't do any rescan work. Reported-by: syzbot+96977faa68092ad382c4@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/000000000000e5454b05f065a803@google.com/ Fixes: e804861bd4e6 ("btrfs: fix deadlock between quota disable and qgroup rescan worker") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 00851c86aa8a..af97413abcf4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3367,6 +3367,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; bool stopped = false; + bool did_leaf_rescans = false; path = btrfs_alloc_path(); if (!path) @@ -3387,6 +3388,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } err = qgroup_rescan_leaf(trans, path); + did_leaf_rescans = true; if (err > 0) btrfs_commit_transaction(trans); @@ -3407,16 +3409,23 @@ out: mutex_unlock(&fs_info->qgroup_rescan_lock); /* - * only update status, since the previous part has already updated the - * qgroup info. + * Only update status, since the previous part has already updated the + * qgroup info, and only if we did any actual work. This also prevents + * race with a concurrent quota disable, which has already set + * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at + * btrfs_quota_disable(). */ - trans = btrfs_start_transaction(fs_info->quota_root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (did_leaf_rescans) { + trans = btrfs_start_transaction(fs_info->quota_root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + btrfs_err(fs_info, + "fail to start transaction for status update: %d", + err); + } + } else { trans = NULL; - btrfs_err(fs_info, - "fail to start transaction for status update: %d", - err); } mutex_lock(&fs_info->qgroup_rescan_lock); -- cgit v1.2.3