diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-07-07 14:10:33 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-07-07 14:10:33 -0700 |
commit | aa27b32b76d0b1b242d43977da0e5358da1c825f (patch) | |
tree | 65406172e9b7a11eecd33c7b0c076850d0ac2f1a /fs | |
parent | 9e4d7696213f0231f7a54512da4b8514eb316edb (diff) | |
parent | 0465337c5599bbe360cdcff452992a1a6b7ed2d4 (diff) | |
download | linux-aa27b32b76d0b1b242d43977da0e5358da1c825f.tar.bz2 |
Merge tag 'for-5.8-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- regression fix of a leak in global block reserve accounting
- fix a (hard to hit) race of readahead vs releasepage that could lead
to crash
- convert all remaining uses of comment fall through annotations to the
pseudo keyword
- fix crash when mounting a fuzzed image with -o recovery
* tag 'for-5.8-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: reset tree root pointer after error in init_tree_roots
btrfs: fix reclaim_size counter leak after stealing from global reserve
btrfs: fix fatal extent_buffer readahead vs releasepage race
btrfs: convert comments to fallthrough annotations
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 40 | ||||
-rw-r--r-- | fs/btrfs/ref-verify.c | 2 | ||||
-rw-r--r-- | fs/btrfs/space-info.c | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 6 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 2 |
7 files changed, 35 insertions, 25 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3a7648bff42c..82ab6e5a386d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1196,7 +1196,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); - /* Fallthrough */ + fallthrough; case MOD_LOG_KEY_REMOVE_WHILE_MOVING: case MOD_LOG_KEY_REMOVE: btrfs_set_node_key(eb, &tm->key, tm->slot); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7c6f0bbb54a5..b1a148058773 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2593,10 +2593,12 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) !extent_buffer_uptodate(tree_root->node)) { handle_error = true; - if (IS_ERR(tree_root->node)) + if (IS_ERR(tree_root->node)) { ret = PTR_ERR(tree_root->node); - else if (!extent_buffer_uptodate(tree_root->node)) + tree_root->node = NULL; + } else if (!extent_buffer_uptodate(tree_root->node)) { ret = -EUCLEAN; + } btrfs_warn(fs_info, "failed to read tree root"); continue; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68c96057ad2d..608f93438b29 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5058,25 +5058,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; - /* the ref bit is tricky. We have to make sure it is set - * if we have the buffer dirty. Otherwise the - * code to free a buffer can end up dropping a dirty - * page + /* + * The TREE_REF bit is first set when the extent_buffer is added + * to the radix tree. It is also reset, if unset, when a new reference + * is created by find_extent_buffer. * - * Once the ref bit is set, it won't go away while the - * buffer is dirty or in writeback, and it also won't - * go away while we have the reference count on the - * eb bumped. + * It is only cleared in two cases: freeing the last non-tree + * reference to the extent_buffer when its STALE bit is set or + * calling releasepage when the tree reference is the only reference. * - * We can't just set the ref bit without bumping the - * ref on the eb because free_extent_buffer might - * see the ref bit and try to clear it. If this happens - * free_extent_buffer might end up dropping our original - * ref by mistake and freeing the page before we are able - * to add one more ref. + * In both cases, care is taken to ensure that the extent_buffer's + * pages are not under io. However, releasepage can be concurrently + * called with creating new references, which is prone to race + * conditions between the calls to check_buffer_tree_ref in those + * codepaths and clearing TREE_REF in try_release_extent_buffer. * - * So bump the ref count first, then set the bit. If someone - * beat us to it, drop the ref we added. + * The actual lifetime of the extent_buffer in the radix tree is + * adequately protected by the refcount, but the TREE_REF bit and + * its corresponding reference are not. To protect against this + * class of races, we call check_buffer_tree_ref from the codepaths + * which trigger io after they set eb->io_pages. Note that once io is + * initiated, TREE_REF can no longer be cleared, so that is the + * moment at which any such race is best fixed. */ refs = atomic_read(&eb->refs); if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) @@ -5527,6 +5530,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); + /* + * It is possible for releasepage to clear the TREE_REF bit before we + * set io_pages. See check_buffer_tree_ref for a more detailed comment. + */ + check_buffer_tree_ref(eb); for (i = 0; i < num_pages; i++) { page = eb->pages[i]; diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 7887317033c9..af92525dbb16 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -509,7 +509,7 @@ static int process_leaf(struct btrfs_root *root, switch (key.type) { case BTRFS_EXTENT_ITEM_KEY: *num_bytes = key.offset; - /* fall through */ + fallthrough; case BTRFS_METADATA_ITEM_KEY: *bytenr = key.objectid; ret = process_extent_item(fs_info, path, &key, i, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 41ee88633769..c7bd3fdd7792 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -879,8 +879,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, return false; } global_rsv->reserved -= ticket->bytes; + remove_ticket(space_info, ticket); ticket->bytes = 0; - list_del_init(&ticket->list); wake_up(&ticket->wait); space_info->tickets_id++; if (global_rsv->reserved < global_rsv->size) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bc73fd670702..c3826ae883f0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -523,7 +523,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_compress_force: case Opt_compress_force_type: compress_force = true; - /* Fallthrough */ + fallthrough; case Opt_compress: case Opt_compress_type: saved_compress_type = btrfs_test_opt(info, @@ -622,7 +622,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_opt(info->mount_opt, NOSSD); btrfs_clear_and_info(info, SSD, "not using ssd optimizations"); - /* Fallthrough */ + fallthrough; case Opt_nossd_spread: btrfs_clear_and_info(info, SSD_SPREAD, "not using spread ssd allocation scheme"); @@ -793,7 +793,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_recovery: btrfs_warn(info, "'recovery' is deprecated, use 'usebackuproot' instead"); - /* fall through */ + fallthrough; case Opt_usebackuproot: btrfs_info(info, "trying to use backup root at mount time"); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f067b5934c46..75af2334b2e3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -408,7 +408,7 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) return BTRFS_MAP_WRITE; default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case REQ_OP_READ: return BTRFS_MAP_READ; } |