diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 170 |
1 files changed, 98 insertions, 72 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 015910079e73..56032c518b26 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -368,6 +368,7 @@ struct async_chunk { u64 end; unsigned int write_flags; struct list_head extents; + struct cgroup_subsys_state *blkcg_css; struct btrfs_work work; atomic_t *pending; }; @@ -712,10 +713,12 @@ cleanup_and_bail_uncompressed: * to our extent and set things up for the async work queue to run * cow_file_range to do the normal delalloc dance. */ - if (page_offset(async_chunk->locked_page) >= start && - page_offset(async_chunk->locked_page) <= end) + if (async_chunk->locked_page && + (page_offset(async_chunk->locked_page) >= start && + page_offset(async_chunk->locked_page)) <= end) { __set_page_dirty_nobuffers(async_chunk->locked_page); /* unlocked later on in the async handlers */ + } if (redirty) extent_range_redirty_for_io(inode, start, end); @@ -795,7 +798,7 @@ retry: async_extent->start + async_extent->ram_size - 1, WB_SYNC_ALL); - else if (ret) + else if (ret && async_chunk->locked_page) unlock_page(async_chunk->locked_page); kfree(async_extent); cond_resched(); @@ -878,7 +881,8 @@ retry: ins.objectid, ins.offset, async_extent->pages, async_extent->nr_pages, - async_chunk->write_flags)) { + async_chunk->write_flags, + async_chunk->blkcg_css)) { struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; const u64 end = start + async_extent->ram_size - 1; @@ -1196,6 +1200,8 @@ static noinline void async_cow_free(struct btrfs_work *work) async_chunk = container_of(work, struct async_chunk, work); if (async_chunk->inode) btrfs_add_delayed_iput(async_chunk->inode); + if (async_chunk->blkcg_css) + css_put(async_chunk->blkcg_css); /* * Since the pointer to 'pending' is at the beginning of the array of * async_chunk's, freeing it ensures the whole array has been freed. @@ -1204,12 +1210,14 @@ static noinline void async_cow_free(struct btrfs_work *work) kvfree(async_chunk->pending); } -static int cow_file_range_async(struct inode *inode, struct page *locked_page, +static int cow_file_range_async(struct inode *inode, + struct writeback_control *wbc, + struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written, - unsigned int write_flags) + unsigned long *nr_written) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc); struct async_cow *ctx; struct async_chunk *async_chunk; unsigned long nr_pages; @@ -1218,6 +1226,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, int i; bool should_compress; unsigned nofs_flag; + const unsigned int write_flags = wbc_to_write_flags(wbc); unlock_extent(&BTRFS_I(inode)->io_tree, start, end); @@ -1264,14 +1273,45 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_chunk[i].inode = inode; async_chunk[i].start = start; async_chunk[i].end = cur_end; - async_chunk[i].locked_page = locked_page; async_chunk[i].write_flags = write_flags; INIT_LIST_HEAD(&async_chunk[i].extents); - btrfs_init_work(&async_chunk[i].work, - btrfs_delalloc_helper, - async_cow_start, async_cow_submit, - async_cow_free); + /* + * The locked_page comes all the way from writepage and its + * the original page we were actually given. As we spread + * this large delalloc region across multiple async_chunk + * structs, only the first struct needs a pointer to locked_page + * + * This way we don't need racey decisions about who is supposed + * to unlock it. + */ + if (locked_page) { + /* + * Depending on the compressibility, the pages might or + * might not go through async. We want all of them to + * be accounted against wbc once. Let's do it here + * before the paths diverge. wbc accounting is used + * only for foreign writeback detection and doesn't + * need full accuracy. Just account the whole thing + * against the first page. + */ + wbc_account_cgroup_owner(wbc, locked_page, + cur_end - start); + async_chunk[i].locked_page = locked_page; + locked_page = NULL; + } else { + async_chunk[i].locked_page = NULL; + } + + if (blkcg_css != blkcg_root_css) { + css_get(blkcg_css); + async_chunk[i].blkcg_css = blkcg_css; + } else { + async_chunk[i].blkcg_css = NULL; + } + + btrfs_init_work(&async_chunk[i].work, async_cow_start, + async_cow_submit, async_cow_free); nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE); atomic_add(nr_pages, &fs_info->async_delalloc_pages); @@ -1697,7 +1737,6 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, { int ret; int force_cow = need_force_cow(inode, start, end); - unsigned int write_flags = wbc_to_write_flags(wbc); if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, @@ -1712,9 +1751,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, } else { set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags); - ret = cow_file_range_async(inode, locked_page, start, end, - page_started, nr_written, - write_flags); + ret = cow_file_range_async(inode, wbc, locked_page, start, end, + page_started, nr_written); } if (ret) btrfs_cleanup_ordered_extents(inode, locked_page, start, @@ -2110,7 +2148,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, } mapit: - ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); + ret = btrfs_map_bio(fs_info, bio, mirror_num); out: if (ret) { @@ -2214,12 +2252,16 @@ again: mapping_set_error(page->mapping, ret); end_extent_writepage(page, ret, page_start, page_end); ClearPageChecked(page); - goto out; + goto out_reserved; } ClearPageChecked(page); set_page_dirty(page); +out_reserved: btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); + if (ret) + btrfs_delalloc_release_space(inode, data_reserved, page_start, + PAGE_SIZE, true); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); @@ -2260,8 +2302,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) SetPageChecked(page); get_page(page); - btrfs_init_work(&fixup->work, btrfs_fixup_helper, - btrfs_writepage_fixup_worker, NULL, NULL); + btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; btrfs_queue_work(fs_info->fixup_workers, &fixup->work); return -EBUSY; @@ -2675,7 +2716,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); if (IS_ERR(inode)) { srcu_read_unlock(&fs_info->subvol_srcu, index); return 0; @@ -2999,7 +3040,7 @@ out_kfree: static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info, u64 start, u64 len) { - struct btrfs_block_group_cache *cache; + struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); @@ -3027,7 +3068,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) int compress_type = 0; int ret = 0; u64 logical_len = ordered_extent->len; - bool nolock; + bool freespace_inode; bool truncated = false; bool range_locked = false; bool clear_new_delalloc_bytes = false; @@ -3038,7 +3079,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags)) clear_new_delalloc_bytes = true; - nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); + freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode)); if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { ret = -EIO; @@ -3069,8 +3110,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, ordered_extent->len); btrfs_ordered_update_i_size(inode, 0, ordered_extent); - if (nolock) - trans = btrfs_join_transaction_nolock(root); + if (freespace_inode) + trans = btrfs_join_transaction_spacecache(root); else trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3104,8 +3145,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) EXTENT_DEFRAG, 0, 0, &cached_state); } - if (nolock) - trans = btrfs_join_transaction_nolock(root); + if (freespace_inode) + trans = btrfs_join_transaction_spacecache(root); else trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3254,7 +3295,6 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ordered_extent *ordered_extent = NULL; struct btrfs_workqueue *wq; - btrfs_work_func_t func; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -3263,16 +3303,12 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, end - start + 1, uptodate)) return; - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (btrfs_is_free_space_inode(BTRFS_I(inode))) wq = fs_info->endio_freespace_worker; - func = btrfs_freespace_write_helper; - } else { + else wq = fs_info->endio_write_workers; - func = btrfs_endio_write_helper; - } - btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, - NULL); + btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); btrfs_queue_work(wq, &ordered_extent->work); } @@ -3531,7 +3567,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; - inode = btrfs_iget(fs_info->sb, &found_key, root, NULL); + inode = btrfs_iget(fs_info->sb, &found_key, root); ret = PTR_ERR_OR_ZERO(inode); if (ret && ret != -ENOENT) goto out; @@ -5153,7 +5189,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) hole_em->block_len = 0; hole_em->orig_block_len = 0; hole_em->ram_bytes = hole_size; - hole_em->bdev = fs_info->fs_devices->latest_bdev; hole_em->compress_type = BTRFS_COMPRESS_NONE; hole_em->generation = fs_info->generation; @@ -5750,12 +5785,14 @@ static struct inode *btrfs_iget_locked(struct super_block *s, return inode; } -/* Get an inode object given its location and corresponding root. - * Returns in *is_new if the inode was read from disk +/* + * Get an inode object given its location and corresponding root. + * Path can be preallocated to prevent recursing back to iget through + * allocator. NULL is also valid but may require an additional allocation + * later. */ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *new, - struct btrfs_path *path) + struct btrfs_root *root, struct btrfs_path *path) { struct inode *inode; @@ -5770,8 +5807,6 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, if (!ret) { inode_tree_add(inode); unlock_new_inode(inode); - if (new) - *new = 1; } else { iget_failed(inode); /* @@ -5789,9 +5824,9 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, } struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *new) + struct btrfs_root *root) { - return btrfs_iget_path(s, location, root, new, NULL); + return btrfs_iget_path(s, location, root, NULL); } static struct inode *new_simple_dir(struct super_block *s, @@ -5857,7 +5892,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return ERR_PTR(ret); if (location.type == BTRFS_INODE_ITEM_KEY) { - inode = btrfs_iget(dir->i_sb, &location, root, NULL); + inode = btrfs_iget(dir->i_sb, &location, root); if (IS_ERR(inode)) return inode; @@ -5882,7 +5917,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) else inode = new_simple_dir(dir->i_sb, &location, sub_root); } else { - inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); + inode = btrfs_iget(dir->i_sb, &location, sub_root); } srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -6931,8 +6966,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); - if (em) - em->bdev = fs_info->fs_devices->latest_bdev; read_unlock(&em_tree->lock); if (em) { @@ -6948,7 +6981,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, err = -ENOMEM; goto out; } - em->bdev = fs_info->fs_devices->latest_bdev; em->start = EXTENT_MAP_HOLE; em->orig_start = EXTENT_MAP_HOLE; em->len = (u64)-1; @@ -7207,7 +7239,6 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, err = -ENOMEM; goto out; } - em->bdev = NULL; ASSERT(hole_em); /* @@ -7567,7 +7598,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, { struct extent_map_tree *em_tree; struct extent_map *em; - struct btrfs_root *root = BTRFS_I(inode)->root; int ret; ASSERT(type == BTRFS_ORDERED_PREALLOC || @@ -7585,7 +7615,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, em->len = len; em->block_len = block_len; em->block_start = block_start; - em->bdev = root->fs_info->fs_devices->latest_bdev; em->orig_block_len = orig_block_len; em->ram_bytes = ram_bytes; em->generation = -1; @@ -7624,6 +7653,8 @@ static int btrfs_get_blocks_direct_read(struct extent_map *em, struct inode *inode, u64 start, u64 len) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + if (em->block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) return -ENOENT; @@ -7633,7 +7664,7 @@ static int btrfs_get_blocks_direct_read(struct extent_map *em, bh_result->b_blocknr = (em->block_start + (start - em->start)) >> inode->i_blkbits; bh_result->b_size = len; - bh_result->b_bdev = em->bdev; + bh_result->b_bdev = fs_info->fs_devices->latest_bdev; set_buffer_mapped(bh_result); return 0; @@ -7716,7 +7747,7 @@ skip_cow: bh_result->b_blocknr = (em->block_start + (start - em->start)) >> inode->i_blkbits; bh_result->b_size = len; - bh_result->b_bdev = em->bdev; + bh_result->b_bdev = fs_info->fs_devices->latest_bdev; set_buffer_mapped(bh_result); if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) @@ -7858,7 +7889,7 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode, if (ret) return ret; - ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); + ret = btrfs_map_bio(fs_info, bio, mirror_num); return ret; } @@ -8211,18 +8242,14 @@ static void __endio_write_update_ordered(struct inode *inode, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ordered_extent *ordered = NULL; struct btrfs_workqueue *wq; - btrfs_work_func_t func; u64 ordered_offset = offset; u64 ordered_bytes = bytes; u64 last_offset; - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (btrfs_is_free_space_inode(BTRFS_I(inode))) wq = fs_info->endio_freespace_worker; - func = btrfs_freespace_write_helper; - } else { + else wq = fs_info->endio_write_workers; - func = btrfs_endio_write_helper; - } while (ordered_offset < offset + bytes) { last_offset = ordered_offset; @@ -8230,9 +8257,8 @@ static void __endio_write_update_ordered(struct inode *inode, &ordered_offset, ordered_bytes, uptodate)) { - btrfs_init_work(&ordered->work, func, - finish_ordered_fn, - NULL, NULL); + btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, + NULL); btrfs_queue_work(wq, &ordered->work); } /* @@ -8389,7 +8415,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, goto err; } map: - ret = btrfs_map_bio(fs_info, bio, 0, 0); + ret = btrfs_map_bio(fs_info, bio, 0); err: return ret; } @@ -9321,7 +9347,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->io_failure_tree.track_uptodate = true; atomic_set(&ei->sync_writers, 0); mutex_init(&ei->log_mutex); - mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->delalloc_inodes); INIT_LIST_HEAD(&ei->delayed_iput); @@ -9550,6 +9575,9 @@ static int btrfs_rename_exchange(struct inode *old_dir, goto out_notrans; } + if (dest != root) + btrfs_record_root_in_trans(trans, dest); + /* * We need to find a free sequence number both in the source and * in the destination directory for the exchange. @@ -10116,8 +10144,7 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode init_completion(&work->completion); INIT_LIST_HEAD(&work->list); work->inode = inode; - btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, - btrfs_run_delalloc_work, NULL, NULL); + btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); return work; } @@ -10450,7 +10477,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, em->block_len = ins.offset; em->orig_block_len = ins.offset; em->ram_bytes = ins.offset; - em->bdev = fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PREALLOC, &em->flags); em->generation = trans->transid; @@ -10806,7 +10832,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, start = 0; while (start < isize) { u64 logical_block_start, physical_block_start; - struct btrfs_block_group_cache *bg; + struct btrfs_block_group *bg; u64 len = isize - start; em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); |