diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 162 |
1 files changed, 129 insertions, 33 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 289f1f09481d..c9a3036c23bf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -42,6 +42,7 @@ #include "discard.h" #include "space-info.h" #include "zoned.h" +#include "subpage.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ @@ -440,6 +441,74 @@ static int btree_read_extent_buffer_pages(struct extent_buffer *eb, return ret; } +static int csum_one_extent_buffer(struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + u8 result[BTRFS_CSUM_SIZE]; + int ret; + + ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, + offsetof(struct btrfs_header, fsid), + BTRFS_FSID_SIZE) == 0); + csum_tree_block(eb, result); + + if (btrfs_header_level(eb)) + ret = btrfs_check_node(eb); + else + ret = btrfs_check_leaf_full(eb); + + if (ret < 0) { + btrfs_print_tree(eb, 0); + btrfs_err(fs_info, + "block=%llu write time tree block corruption detected", + eb->start); + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + return ret; + } + write_extent_buffer(eb, result, 0, fs_info->csum_size); + + return 0; +} + +/* Checksum all dirty extent buffers in one bio_vec */ +static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info, + struct bio_vec *bvec) +{ + struct page *page = bvec->bv_page; + u64 bvec_start = page_offset(page) + bvec->bv_offset; + u64 cur; + int ret = 0; + + for (cur = bvec_start; cur < bvec_start + bvec->bv_len; + cur += fs_info->nodesize) { + struct extent_buffer *eb; + bool uptodate; + + eb = find_extent_buffer(fs_info, cur); + uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur, + fs_info->nodesize); + + /* A dirty eb shouldn't disappear from buffer_radix */ + if (WARN_ON(!eb)) + return -EUCLEAN; + + if (WARN_ON(cur != btrfs_header_bytenr(eb))) { + free_extent_buffer(eb); + return -EUCLEAN; + } + if (WARN_ON(!uptodate)) { + free_extent_buffer(eb); + return -EUCLEAN; + } + + ret = csum_one_extent_buffer(eb); + free_extent_buffer(eb); + if (ret < 0) + return ret; + } + return ret; +} + /* * Checksum a dirty tree block before IO. This has extra checks to make sure * we only fill in the checksum field in the first page of a multi-page block. @@ -450,9 +519,10 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec struct page *page = bvec->bv_page; u64 start = page_offset(page); u64 found_start; - u8 result[BTRFS_CSUM_SIZE]; struct extent_buffer *eb; - int ret; + + if (fs_info->sectorsize < PAGE_SIZE) + return csum_dirty_subpage_buffers(fs_info, bvec); eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) @@ -474,28 +544,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec if (WARN_ON(!PageUptodate(page))) return -EUCLEAN; - ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, - offsetof(struct btrfs_header, fsid), - BTRFS_FSID_SIZE) == 0); - - csum_tree_block(eb, result); - - if (btrfs_header_level(eb)) - ret = btrfs_check_node(eb); - else - ret = btrfs_check_leaf_full(eb); - - if (ret < 0) { - btrfs_print_tree(eb, 0); - btrfs_err(fs_info, - "block=%llu write time tree block corruption detected", - eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); - return ret; - } - write_extent_buffer(eb, result, 0, fs_info->csum_size); - - return 0; + return csum_one_extent_buffer(eb); } static int check_tree_block_fsid(struct extent_buffer *eb) @@ -992,14 +1041,48 @@ static void btree_invalidatepage(struct page *page, unsigned int offset, static int btree_set_page_dirty(struct page *page) { #ifdef DEBUG + struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_subpage *subpage; struct extent_buffer *eb; + int cur_bit = 0; + u64 page_start = page_offset(page); + + if (fs_info->sectorsize == PAGE_SIZE) { + BUG_ON(!PagePrivate(page)); + eb = (struct extent_buffer *)page->private; + BUG_ON(!eb); + BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(!atomic_read(&eb->refs)); + btrfs_assert_tree_locked(eb); + return __set_page_dirty_nobuffers(page); + } + ASSERT(PagePrivate(page) && page->private); + subpage = (struct btrfs_subpage *)page->private; + + ASSERT(subpage->dirty_bitmap); + while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) { + unsigned long flags; + u64 cur; + u16 tmp = (1 << cur_bit); + + spin_lock_irqsave(&subpage->lock, flags); + if (!(tmp & subpage->dirty_bitmap)) { + spin_unlock_irqrestore(&subpage->lock, flags); + cur_bit++; + continue; + } + spin_unlock_irqrestore(&subpage->lock, flags); + cur = page_start + cur_bit * fs_info->sectorsize; - BUG_ON(!PagePrivate(page)); - eb = (struct extent_buffer *)page->private; - BUG_ON(!eb); - BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(!atomic_read(&eb->refs)); - btrfs_assert_tree_locked(eb); + eb = find_extent_buffer(fs_info, cur); + ASSERT(eb); + ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + ASSERT(atomic_read(&eb->refs)); + btrfs_assert_tree_locked(eb); + free_extent_buffer(eb); + + cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits); + } #endif return __set_page_dirty_nobuffers(page); } @@ -1807,14 +1890,21 @@ static int cleaner_kthread(void *arg) btrfs_run_defrag_inodes(fs_info); /* - * Acquires fs_info->delete_unused_bgs_mutex to avoid racing + * Acquires fs_info->reclaim_bgs_lock to avoid racing * with relocation (btrfs_relocate_chunk) and relocation * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) - * after acquiring fs_info->delete_unused_bgs_mutex. So we + * after acquiring fs_info->reclaim_bgs_lock. So we * can't hold, nor need to, fs_info->cleaner_mutex when deleting * unused block groups. */ btrfs_delete_unused_bgs(fs_info); + + /* + * Reclaim block groups in the reclaim_bgs list after we deleted + * all unused block_groups. This possibly gives us some more free + * space. + */ + btrfs_reclaim_bgs(fs_info); sleep: clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) @@ -2793,7 +2883,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->treelog_bg_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); - mutex_init(&fs_info->delete_unused_bgs_mutex); + mutex_init(&fs_info->reclaim_bgs_lock); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->zoned_meta_io_lock); @@ -2803,6 +2893,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); + INIT_LIST_HEAD(&fs_info->reclaim_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_ebs); @@ -2891,6 +2982,9 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->swapfile_pins = RB_ROOT; fs_info->send_in_progress = 0; + + fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH; + INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work); } static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb) @@ -4249,6 +4343,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); + cancel_work_sync(&fs_info->reclaim_bgs_work); + /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); |