From 0e0078f72be81bbb2a02b229fd2cec8ad63e4fb1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Nov 2022 18:47:48 +0100
Subject: btrfs: factor out scratching of one regular super block

btrfs_scratch_superblocks open codes scratching super block of a
non-zoned super block.  Split the code to read, zero and write the
superblock for regular devices into a separate helper.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 51 ++++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bf0decaac7f3..119dc26ca141 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2014,42 +2014,43 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
 	return num_devices;
 }
 
+static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
+				     struct block_device *bdev, int copy_num)
+{
+	struct btrfs_super_block *disk_super;
+	struct page *page;
+	int ret;
+
+	disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
+	if (IS_ERR(disk_super))
+		return;
+
+	memset(&disk_super->magic, 0, sizeof(disk_super->magic));
+	page = virt_to_page(disk_super);
+	set_page_dirty(page);
+	lock_page(page);
+	/* write_on_page() unlocks the page */
+	ret = write_one_page(page);
+	if (ret)
+		btrfs_warn(fs_info, "error clearing superblock number %d (%d)",
+			copy_num, ret);
+	btrfs_release_disk_super(disk_super);
+}
+
 void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
 			       struct block_device *bdev,
 			       const char *device_path)
 {
-	struct btrfs_super_block *disk_super;
 	int copy_num;
 
 	if (!bdev)
 		return;
 
 	for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
-		struct page *page;
-		int ret;
-
-		disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
-		if (IS_ERR(disk_super))
-			continue;
-
-		if (bdev_is_zoned(bdev)) {
+		if (bdev_is_zoned(bdev))
 			btrfs_reset_sb_log_zones(bdev, copy_num);
-			continue;
-		}
-
-		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
-
-		page = virt_to_page(disk_super);
-		set_page_dirty(page);
-		lock_page(page);
-		/* write_on_page() unlocks the page */
-		ret = write_one_page(page);
-		if (ret)
-			btrfs_warn(fs_info,
-				"error clearing superblock number %d (%d)",
-				copy_num, ret);
-		btrfs_release_disk_super(disk_super);
-
+		else
+			btrfs_scratch_superblock(fs_info, bdev, copy_num);
 	}
 
 	/* Notify udev that device has changed */
-- 
cgit v1.2.3


From 26ecf243e407be54807ad67210f7e83b9fad71ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Nov 2022 18:47:49 +0100
Subject: btrfs: stop using write_one_page in btrfs_scratch_superblock

write_one_page is an awkward interface that expects the page locked and
->writepage to be implemented.  Replace that by zeroing the signature
bytes and synchronize the block device page using the proper bdev
helpers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 119dc26ca141..bcfef75b97da 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2018,23 +2018,22 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info,
 				     struct block_device *bdev, int copy_num)
 {
 	struct btrfs_super_block *disk_super;
-	struct page *page;
+	const size_t len = sizeof(disk_super->magic);
+	const u64 bytenr = btrfs_sb_offset(copy_num);
 	int ret;
 
-	disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
+	disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr);
 	if (IS_ERR(disk_super))
 		return;
 
-	memset(&disk_super->magic, 0, sizeof(disk_super->magic));
-	page = virt_to_page(disk_super);
-	set_page_dirty(page);
-	lock_page(page);
-	/* write_on_page() unlocks the page */
-	ret = write_one_page(page);
+	memset(&disk_super->magic, 0, len);
+	folio_mark_dirty(virt_to_folio(disk_super));
+	btrfs_release_disk_super(disk_super);
+
+	ret = sync_blockdev_range(bdev, bytenr, bytenr + len - 1);
 	if (ret)
 		btrfs_warn(fs_info, "error clearing superblock number %d (%d)",
 			copy_num, ret);
-	btrfs_release_disk_super(disk_super);
 }
 
 void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
-- 
cgit v1.2.3


From 1f55ee6d0901d915801618bda0af4e5b937e3db7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 12 Jan 2023 14:17:20 +0000
Subject: btrfs: fix invalid leaf access due to inline extent during lseek

During lseek, for SEEK_DATA and SEEK_HOLE modes, we access the disk_bytenr
of an extent without checking its type. However inline extents have their
data starting the offset of the disk_bytenr field, so accessing that field
when we have an inline extent can result in either of the following:

1) Interpret the inline extent's data as a disk_bytenr value;

2) In case the inline data is less than 8 bytes, we access part of some
   other item in the leaf, or unused space in the leaf;

3) In case the inline data is less than 8 bytes and the extent item is
   the first item in the leaf, we can access beyond the leaf's limit.

So fix this by not accessing the disk_bytenr field if we have an inline
extent.

Fixes: b6e833567ea1 ("btrfs: make hole and data seeking a lot more efficient")
Reported-by: Matthias Schoepfer <matthias.schoepfer@googlemail.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216908
Link: https://lore.kernel.org/linux-btrfs/7f25442f-b121-2a3a-5a3d-22bcaae83cd4@leemhuis.info/
CC: stable@vger.kernel.org # 6.1
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 834bbcb91102..af046d22300e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3541,6 +3541,7 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
 		struct extent_buffer *leaf = path->nodes[0];
 		struct btrfs_file_extent_item *extent;
 		u64 extent_end;
+		u8 type;
 
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 			ret = btrfs_next_leaf(root, path);
@@ -3596,10 +3597,16 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
 
 		extent = btrfs_item_ptr(leaf, path->slots[0],
 					struct btrfs_file_extent_item);
+		type = btrfs_file_extent_type(leaf, extent);
 
-		if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 ||
-		    btrfs_file_extent_type(leaf, extent) ==
-		    BTRFS_FILE_EXTENT_PREALLOC) {
+		/*
+		 * Can't access the extent's disk_bytenr field if this is an
+		 * inline extent, since at that offset, it's where the extent
+		 * data starts.
+		 */
+		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+		    (type == BTRFS_FILE_EXTENT_REG &&
+		     btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) {
 			/*
 			 * Explicit hole or prealloc extent, search for delalloc.
 			 * A prealloc extent is treated like a hole.
-- 
cgit v1.2.3


From b7adbf9ada3513d2092362c8eac5cddc5b651f5c Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 12 Jan 2023 16:31:08 +0000
Subject: btrfs: fix race between quota rescan and disable leading to NULL
 pointer deref

If we have one task trying to start the quota rescan worker while another
one is trying to disable quotas, we can end up hitting a race that results
in the quota rescan worker doing a NULL pointer dereference. The steps for
this are the following:

1) Quotas are enabled;

2) Task A calls the quota rescan ioctl and enters btrfs_qgroup_rescan().
   It calls qgroup_rescan_init() which returns 0 (success) and then joins a
   transaction and commits it;

3) Task B calls the quota disable ioctl and enters btrfs_quota_disable().
   It clears the bit BTRFS_FS_QUOTA_ENABLED from fs_info->flags and calls
   btrfs_qgroup_wait_for_completion(), which returns immediately since the
   rescan worker is not yet running.
   Then it starts a transaction and locks fs_info->qgroup_ioctl_lock;

4) Task A queues the rescan worker, by calling btrfs_queue_work();

5) The rescan worker starts, and calls rescan_should_stop() at the start
   of its while loop, which results in 0 iterations of the loop, since
   the flag BTRFS_FS_QUOTA_ENABLED was cleared from fs_info->flags by
   task B at step 3);

6) Task B sets fs_info->quota_root to NULL;

7) The rescan worker tries to start a transaction and uses
   fs_info->quota_root as the root argument for btrfs_start_transaction().
   This results in a NULL pointer dereference down the call chain of
   btrfs_start_transaction(). The stack trace is something like the one
   reported in Link tag below:

   general protection fault, probably for non-canonical address 0xdffffc0000000041: 0000 [#1] PREEMPT SMP KASAN
   KASAN: null-ptr-deref in range [0x0000000000000208-0x000000000000020f]
   CPU: 1 PID: 34 Comm: kworker/u4:2 Not tainted 6.1.0-syzkaller-13872-gb6bb9676f216 #0
   Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
   Workqueue: btrfs-qgroup-rescan btrfs_work_helper
   RIP: 0010:start_transaction+0x48/0x10f0 fs/btrfs/transaction.c:564
   Code: 48 89 fb 48 (...)
   RSP: 0018:ffffc90000ab7ab0 EFLAGS: 00010206
   RAX: 0000000000000041 RBX: 0000000000000208 RCX: ffff88801779ba80
   RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
   RBP: dffffc0000000000 R08: 0000000000000001 R09: fffff52000156f5d
   R10: fffff52000156f5d R11: 1ffff92000156f5c R12: 0000000000000000
   R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000003
   FS:  0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000
   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
   CR2: 00007f2bea75b718 CR3: 000000001d0cc000 CR4: 00000000003506e0
   DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
   DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
   Call Trace:
    <TASK>
    btrfs_qgroup_rescan_worker+0x3bb/0x6a0 fs/btrfs/qgroup.c:3402
    btrfs_work_helper+0x312/0x850 fs/btrfs/async-thread.c:280
    process_one_work+0x877/0xdb0 kernel/workqueue.c:2289
    worker_thread+0xb14/0x1330 kernel/workqueue.c:2436
    kthread+0x266/0x300 kernel/kthread.c:376
    ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
    </TASK>
   Modules linked in:

So fix this by having the rescan worker function not attempt to start a
transaction if it didn't do any rescan work.

Reported-by: syzbot+96977faa68092ad382c4@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-btrfs/000000000000e5454b05f065a803@google.com/
Fixes: e804861bd4e6 ("btrfs: fix deadlock between quota disable and qgroup rescan worker")
CC: stable@vger.kernel.org # 5.4+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 00851c86aa8a..af97413abcf4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3367,6 +3367,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	int err = -ENOMEM;
 	int ret = 0;
 	bool stopped = false;
+	bool did_leaf_rescans = false;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -3387,6 +3388,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 		}
 
 		err = qgroup_rescan_leaf(trans, path);
+		did_leaf_rescans = true;
 
 		if (err > 0)
 			btrfs_commit_transaction(trans);
@@ -3407,16 +3409,23 @@ out:
 	mutex_unlock(&fs_info->qgroup_rescan_lock);
 
 	/*
-	 * only update status, since the previous part has already updated the
-	 * qgroup info.
+	 * Only update status, since the previous part has already updated the
+	 * qgroup info, and only if we did any actual work. This also prevents
+	 * race with a concurrent quota disable, which has already set
+	 * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
+	 * btrfs_quota_disable().
 	 */
-	trans = btrfs_start_transaction(fs_info->quota_root, 1);
-	if (IS_ERR(trans)) {
-		err = PTR_ERR(trans);
+	if (did_leaf_rescans) {
+		trans = btrfs_start_transaction(fs_info->quota_root, 1);
+		if (IS_ERR(trans)) {
+			err = PTR_ERR(trans);
+			trans = NULL;
+			btrfs_err(fs_info,
+				  "fail to start transaction for status update: %d",
+				  err);
+		}
+	} else {
 		trans = NULL;
-		btrfs_err(fs_info,
-			  "fail to start transaction for status update: %d",
-			  err);
 	}
 
 	mutex_lock(&fs_info->qgroup_rescan_lock);
-- 
cgit v1.2.3