diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 60 |
1 files changed, 59 insertions, 1 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a5e64dda2db9..dbc115a25798 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9005,6 +9005,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) INIT_LIST_HEAD(&cache->bg_list); INIT_LIST_HEAD(&cache->ro_list); btrfs_init_free_space_ctl(cache); + atomic_set(&cache->trimming, 0); return cache; } @@ -9306,7 +9307,8 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } int btrfs_remove_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 group_start) + struct btrfs_root *root, u64 group_start, + struct extent_map *em) { struct btrfs_path *path; struct btrfs_block_group_cache *block_group; @@ -9319,6 +9321,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, int index; int factor; struct btrfs_caching_control *caching_ctl = NULL; + bool remove_em; root = root->fs_info->extent_root; @@ -9464,6 +9467,61 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, memcpy(&key, &block_group->key, sizeof(key)); + lock_chunks(root); + spin_lock(&block_group->lock); + block_group->removed = 1; + /* + * At this point trimming can't start on this block group, because we + * removed the block group from the tree fs_info->block_group_cache_tree + * so no one can't find it anymore and even if someone already got this + * block group before we removed it from the rbtree, they have already + * incremented block_group->trimming - if they didn't, they won't find + * any free space entries because we already removed them all when we + * called btrfs_remove_free_space_cache(). + * + * And we must not remove the extent map from the fs_info->mapping_tree + * to prevent the same logical address range and physical device space + * ranges from being reused for a new block group. This is because our + * fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is + * completely transactionless, so while it is trimming a range the + * currently running transaction might finish and a new one start, + * allowing for new block groups to be created that can reuse the same + * physical device locations unless we take this special care. + */ + remove_em = (atomic_read(&block_group->trimming) == 0); + /* + * Make sure a trimmer task always sees the em in the pinned_chunks list + * if it sees block_group->removed == 1 (needs to lock block_group->lock + * before checking block_group->removed). + */ + if (!remove_em) { + /* + * Our em might be in trans->transaction->pending_chunks which + * is protected by fs_info->chunk_mutex ([lock|unlock]_chunks), + * and so is the fs_info->pinned_chunks list. + * + * So at this point we must be holding the chunk_mutex to avoid + * any races with chunk allocation (more specifically at + * volumes.c:contains_pending_extent()), to ensure it always + * sees the em, either in the pending_chunks list or in the + * pinned_chunks list. + */ + list_move_tail(&em->list, &root->fs_info->pinned_chunks); + } + spin_unlock(&block_group->lock); + unlock_chunks(root); + + if (remove_em) { + struct extent_map_tree *em_tree; + + em_tree = &root->fs_info->mapping_tree.map_tree; + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + /* once for the tree */ + free_extent_map(em); + } + btrfs_put_block_group(block_group); btrfs_put_block_group(block_group); |