summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-07-07 14:10:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-07-07 14:10:33 -0700
commitaa27b32b76d0b1b242d43977da0e5358da1c825f (patch)
tree65406172e9b7a11eecd33c7b0c076850d0ac2f1a /fs
parent9e4d7696213f0231f7a54512da4b8514eb316edb (diff)
parent0465337c5599bbe360cdcff452992a1a6b7ed2d4 (diff)
downloadlinux-aa27b32b76d0b1b242d43977da0e5358da1c825f.tar.bz2
Merge tag 'for-5.8-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - regression fix of a leak in global block reserve accounting - fix a (hard to hit) race of readahead vs releasepage that could lead to crash - convert all remaining uses of comment fall through annotations to the pseudo keyword - fix crash when mounting a fuzzed image with -o recovery * tag 'for-5.8-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: reset tree root pointer after error in init_tree_roots btrfs: fix reclaim_size counter leak after stealing from global reserve btrfs: fix fatal extent_buffer readahead vs releasepage race btrfs: convert comments to fallthrough annotations
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent_io.c40
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/space-info.c2
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/volumes.h2
7 files changed, 35 insertions, 25 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3a7648bff42c..82ab6e5a386d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1196,7 +1196,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
switch (tm->op) {
case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
BUG_ON(tm->slot < n);
- /* Fallthrough */
+ fallthrough;
case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case MOD_LOG_KEY_REMOVE:
btrfs_set_node_key(eb, &tm->key, tm->slot);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7c6f0bbb54a5..b1a148058773 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2593,10 +2593,12 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
!extent_buffer_uptodate(tree_root->node)) {
handle_error = true;
- if (IS_ERR(tree_root->node))
+ if (IS_ERR(tree_root->node)) {
ret = PTR_ERR(tree_root->node);
- else if (!extent_buffer_uptodate(tree_root->node))
+ tree_root->node = NULL;
+ } else if (!extent_buffer_uptodate(tree_root->node)) {
ret = -EUCLEAN;
+ }
btrfs_warn(fs_info, "failed to read tree root");
continue;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68c96057ad2d..608f93438b29 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5058,25 +5058,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
+ /*
+ * The TREE_REF bit is first set when the extent_buffer is added
+ * to the radix tree. It is also reset, if unset, when a new reference
+ * is created by find_extent_buffer.
*
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
+ * It is only cleared in two cases: freeing the last non-tree
+ * reference to the extent_buffer when its STALE bit is set or
+ * calling releasepage when the tree reference is the only reference.
*
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
+ * In both cases, care is taken to ensure that the extent_buffer's
+ * pages are not under io. However, releasepage can be concurrently
+ * called with creating new references, which is prone to race
+ * conditions between the calls to check_buffer_tree_ref in those
+ * codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
+ * The actual lifetime of the extent_buffer in the radix tree is
+ * adequately protected by the refcount, but the TREE_REF bit and
+ * its corresponding reference are not. To protect against this
+ * class of races, we call check_buffer_tree_ref from the codepaths
+ * which trigger io after they set eb->io_pages. Note that once io is
+ * initiated, TREE_REF can no longer be cleared, so that is the
+ * moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -5527,6 +5530,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
+ /*
+ * It is possible for releasepage to clear the TREE_REF bit before we
+ * set io_pages. See check_buffer_tree_ref for a more detailed comment.
+ */
+ check_buffer_tree_ref(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 7887317033c9..af92525dbb16 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -509,7 +509,7 @@ static int process_leaf(struct btrfs_root *root,
switch (key.type) {
case BTRFS_EXTENT_ITEM_KEY:
*num_bytes = key.offset;
- /* fall through */
+ fallthrough;
case BTRFS_METADATA_ITEM_KEY:
*bytenr = key.objectid;
ret = process_extent_item(fs_info, path, &key, i,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 41ee88633769..c7bd3fdd7792 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -879,8 +879,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
return false;
}
global_rsv->reserved -= ticket->bytes;
+ remove_ticket(space_info, ticket);
ticket->bytes = 0;
- list_del_init(&ticket->list);
wake_up(&ticket->wait);
space_info->tickets_id++;
if (global_rsv->reserved < global_rsv->size)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index bc73fd670702..c3826ae883f0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -523,7 +523,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_compress_force:
case Opt_compress_force_type:
compress_force = true;
- /* Fallthrough */
+ fallthrough;
case Opt_compress:
case Opt_compress_type:
saved_compress_type = btrfs_test_opt(info,
@@ -622,7 +622,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, NOSSD);
btrfs_clear_and_info(info, SSD,
"not using ssd optimizations");
- /* Fallthrough */
+ fallthrough;
case Opt_nossd_spread:
btrfs_clear_and_info(info, SSD_SPREAD,
"not using spread ssd allocation scheme");
@@ -793,7 +793,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_recovery:
btrfs_warn(info,
"'recovery' is deprecated, use 'usebackuproot' instead");
- /* fall through */
+ fallthrough;
case Opt_usebackuproot:
btrfs_info(info,
"trying to use backup root at mount time");
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f067b5934c46..75af2334b2e3 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -408,7 +408,7 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
return BTRFS_MAP_WRITE;
default:
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case REQ_OP_READ:
return BTRFS_MAP_READ;
}