diff options
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/Kconfig | 10 | ||||
-rw-r--r-- | fs/f2fs/acl.h | 2 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 52 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 263 | ||||
-rw-r--r-- | fs/f2fs/data.c | 389 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 64 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 376 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 18 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 270 | ||||
-rw-r--r-- | fs/f2fs/file.c | 659 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 200 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 2 | ||||
-rw-r--r-- | fs/f2fs/hash.c | 77 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 71 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 4 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 37 | ||||
-rw-r--r-- | fs/f2fs/node.c | 132 | ||||
-rw-r--r-- | fs/f2fs/node.h | 5 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 63 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 149 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 12 | ||||
-rw-r--r-- | fs/f2fs/super.c | 272 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 124 | ||||
-rw-r--r-- | fs/f2fs/trace.h | 2 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 41 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 4 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 8 |
27 files changed, 2394 insertions, 912 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index bb68d21e1f8c..d13c5c6a9787 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -127,3 +127,13 @@ config F2FS_FS_ZSTD default y help Support ZSTD compress algorithm, if unsure, say Y. + +config F2FS_FS_LZORLE + bool "LZO-RLE compression support" + depends on F2FS_FS_COMPRESSION + depends on F2FS_FS_LZO + select LZO_COMPRESS + select LZO_DECOMPRESS + default y + help + Support LZO-RLE compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index b96823c59b15..124868c13f80 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/acl.h * diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 852890b72d6a..ff807e14c891 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,6 +86,8 @@ repeat: return ERR_PTR(err); } + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -220,6 +222,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .is_por = (type == META_POR), }; struct blk_plug plug; + int err; if (unlikely(type == META_POR)) fio.op_flags &= ~REQ_META; @@ -263,8 +266,11 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, } fio.page = page; - f2fs_submit_page_bio(&fio); - f2fs_put_page(page, 0); + err = f2fs_submit_page_bio(&fio); + f2fs_put_page(page, err ? 1 : 0); + + if (!err) + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); } out: blk_finish_plug(&plug); @@ -517,7 +523,7 @@ void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) __remove_ino_entry(sbi, ino, type); } -/* mode should be APPEND_INO or UPDATE_INO */ +/* mode should be APPEND_INO, UPDATE_INO or TRANS_DIR_INO */ bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { struct inode_management *im = &sbi->im[mode]; @@ -889,8 +895,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) int i; int err; - sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), - GFP_KERNEL); + sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks), + GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -1160,10 +1166,12 @@ static int block_operations(struct f2fs_sb_info *sbi) .nr_to_write = LONG_MAX, .for_reclaim = 0, }; - struct blk_plug plug; int err = 0, cnt = 0; - blk_start_plug(&plug); + /* + * Let's flush inline_data in dirty node pages. + */ + f2fs_flush_inline_data(sbi); retry_flush_quotas: f2fs_lock_all(sbi); @@ -1192,7 +1200,7 @@ retry_flush_dents: f2fs_unlock_all(sbi); err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); if (err) - goto out; + return err; cond_resched(); goto retry_flush_quotas; } @@ -1208,7 +1216,7 @@ retry_flush_dents: f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) - goto out; + return err; cond_resched(); goto retry_flush_quotas; } @@ -1224,7 +1232,7 @@ retry_flush_nodes: if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); - goto out; + return err; } cond_resched(); goto retry_flush_nodes; @@ -1236,8 +1244,6 @@ retry_flush_nodes: */ __prepare_cp_block(sbi); up_write(&sbi->node_change); -out: - blk_finish_plug(&plug); return err; } @@ -1252,14 +1258,19 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!get_pages(sbi, type)) break; if (unlikely(f2fs_cp_error(sbi))) break; + if (type == F2FS_DIRTY_META) + f2fs_sync_meta_pages(sbi, META, LONG_MAX, + FS_CP_META_IO); + else if (type == F2FS_WB_CP_DATA) + f2fs_submit_merged_write(sbi, DATA); + + prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); @@ -1406,7 +1417,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); } - /* 2 cp + n data seg summary + orphan inode blocks */ + /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false); spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) @@ -1506,9 +1517,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* * invalidate intermediate page cache borrowed from meta inode which are - * used for migration of encrypted or verity inode's blocks. + * used for migration of encrypted, verity or compressed inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi)) + if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) || + f2fs_sb_has_compression(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); @@ -1553,7 +1565,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return 0; f2fs_warn(sbi, "Start checkpoint disabled!"); } - mutex_lock(&sbi->cp_mutex); + if (cpc->reason != CP_RESIZE) + mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1622,7 +1635,8 @@ stop: f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: - mutex_unlock(&sbi->cp_mutex); + if (cpc->reason != CP_RESIZE) + mutex_unlock(&sbi->cp_mutex); return err; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index df7b2d15eacd..1dfb126a0cb2 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -49,6 +49,13 @@ bool f2fs_is_compressed_page(struct page *page) return false; if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) return false; + /* + * page->private may be set with pid. + * pid_max is enough to check if it is traced. + */ + if (IS_IO_TRACED_PAGE(page)) + return false; + f2fs_bug_on(F2FS_M_SB(page->mapping), *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); return true; @@ -65,15 +72,6 @@ static void f2fs_set_compressed_page(struct page *page, page->mapping = inode->i_mapping; } -static void f2fs_put_compressed_page(struct page *page) -{ - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); - page->mapping = NULL; - unlock_page(page); - put_page(page); -} - static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) { int i; @@ -98,8 +96,7 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) f2fs_drop_rpages(cc, len, true); } -static void f2fs_put_rpages_mapping(struct compress_ctx *cc, - struct address_space *mapping, +static void f2fs_put_rpages_mapping(struct address_space *mapping, pgoff_t start, int len) { int i; @@ -236,7 +233,12 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) if (!cc->private) return -ENOMEM; - cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size); + /* + * we do not change cc->clen to LZ4_compressBound(inputsize) to + * adapt worst compress case, because lz4 compressor can handle + * output budget properly. + */ + cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE; return 0; } @@ -252,11 +254,9 @@ static int lz4_compress_pages(struct compress_ctx *cc) len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, cc->clen, cc->private); - if (!len) { - printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id); - return -EIO; - } + if (!len) + return -EAGAIN; + cc->clen = len; return 0; } @@ -366,6 +366,13 @@ static int zstd_compress_pages(struct compress_ctx *cc) return -EIO; } + /* + * there is compressed data remained in intermediate buffer due to + * no more space in cbuf.cdata + */ + if (ret) + return -EAGAIN; + cc->clen = outbuf.pos; return 0; } @@ -451,6 +458,31 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_LZO +#ifdef CONFIG_F2FS_FS_LZORLE +static int lzorle_compress_pages(struct compress_ctx *cc) +{ + int ret; + + ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, + &cc->clen, cc->private); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lzorle_ops = { + .init_compress_ctx = lzo_init_compress_ctx, + .destroy_compress_ctx = lzo_destroy_compress_ctx, + .compress_pages = lzorle_compress_pages, + .decompress_pages = lzo_decompress_pages, +}; +#endif +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -467,6 +499,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#if defined(CONFIG_F2FS_FS_LZO) && defined(CONFIG_F2FS_FS_LZORLE) + &f2fs_lzorle_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) @@ -476,17 +513,47 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } -static struct page *f2fs_grab_page(void) +static mempool_t *compress_page_pool; +static int num_compress_pages = 512; +module_param(num_compress_pages, uint, 0444); +MODULE_PARM_DESC(num_compress_pages, + "Number of intermediate compress pages to preallocate"); + +int f2fs_init_compress_mempool(void) +{ + compress_page_pool = mempool_create_page_pool(num_compress_pages, 0); + if (!compress_page_pool) + return -ENOMEM; + + return 0; +} + +void f2fs_destroy_compress_mempool(void) +{ + mempool_destroy(compress_page_pool); +} + +static struct page *f2fs_compress_alloc_page(void) { struct page *page; - page = alloc_page(GFP_NOFS); - if (!page) - return NULL; + page = mempool_alloc(compress_page_pool, GFP_NOFS); lock_page(page); + return page; } +static void f2fs_compress_free_page(struct page *page) +{ + if (!page) + return; + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + page->mapping = NULL; + unlock_page(page); + mempool_free(page, compress_page_pool); +} + static int f2fs_compress_pages(struct compress_ctx *cc) { struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); @@ -516,7 +583,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) } for (i = 0; i < cc->nr_cpages; i++) { - cc->cpages[i] = f2fs_grab_page(); + cc->cpages[i] = f2fs_compress_alloc_page(); if (!cc->cpages[i]) { ret = -ENOMEM; goto out_free_cpages; @@ -561,7 +628,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) vunmap(cc->rbuf); for (i = nr_cpages; i < cc->nr_cpages; i++) { - f2fs_put_compressed_page(cc->cpages[i]); + f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } @@ -581,7 +648,7 @@ out_vunmap_rbuf: out_free_cpages: for (i = 0; i < cc->nr_cpages; i++) { if (cc->cpages[i]) - f2fs_put_compressed_page(cc->cpages[i]); + f2fs_compress_free_page(cc->cpages[i]); } kfree(cc->cpages); cc->cpages = NULL; @@ -603,6 +670,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; int ret; + int i; dec_page_count(sbi, F2FS_RD_DATA); @@ -621,6 +689,26 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) goto out_free_dic; } + dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->cluster_size, GFP_NOFS); + if (!dic->tpages) { + ret = -ENOMEM; + goto out_free_dic; + } + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) { + dic->tpages[i] = dic->rpages[i]; + continue; + } + + dic->tpages[i] = f2fs_compress_alloc_page(); + if (!dic->tpages[i]) { + ret = -ENOMEM; + goto out_free_dic; + } + } + if (cops->init_decompress_ctx) { ret = cops->init_decompress_ctx(dic); if (ret) @@ -761,7 +849,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) } /* return # of valid blocks in compressed cluster */ -static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) +static int f2fs_cluster_blocks(struct compress_ctx *cc) { return __f2fs_cluster_blocks(cc, false); } @@ -775,7 +863,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, }; - return f2fs_cluster_blocks(&cc, false); + return f2fs_cluster_blocks(&cc); } static bool cluster_may_compress(struct compress_ctx *cc) @@ -788,6 +876,8 @@ static bool cluster_may_compress(struct compress_ctx *cc) return false; if (!f2fs_cluster_is_full(cc)) return false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode)))) + return false; return __cluster_may_compress(cc); } @@ -824,7 +914,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, bool prealloc; retry: - ret = f2fs_cluster_blocks(cc, false); + ret = f2fs_cluster_blocks(cc); if (ret <= 0) return ret; @@ -879,7 +969,7 @@ retry: if (!PageUptodate(page)) { f2fs_unlock_rpages(cc, i + 1); - f2fs_put_rpages_mapping(cc, mapping, start_idx, + f2fs_put_rpages_mapping(mapping, start_idx, cc->cluster_size); f2fs_destroy_compress_ctx(cc); goto retry; @@ -887,7 +977,7 @@ retry: } if (prealloc) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); set_new_dnode(&dn, cc->inode, NULL, NULL, 0); @@ -902,7 +992,7 @@ retry: break; } - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); } if (likely(!ret)) { @@ -914,7 +1004,7 @@ retry: unlock_pages: f2fs_unlock_rpages(cc, i); release_pages: - f2fs_put_rpages_mapping(cc, mapping, start_idx, i); + f2fs_put_rpages_mapping(mapping, start_idx, i); f2fs_destroy_compress_ctx(cc); return ret; } @@ -954,6 +1044,55 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, return first_index; } +int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) +{ + void *fsdata = NULL; + struct page *pagep; + int log_cluster_size = F2FS_I(inode)->i_log_cluster_size; + pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) << + log_cluster_size; + int err; + + err = f2fs_is_compressed_cluster(inode, start_idx); + if (err < 0) + return err; + + /* truncate normal cluster */ + if (!err) + return f2fs_do_truncate_blocks(inode, from, lock); + + /* truncate compressed cluster */ + err = f2fs_prepare_compress_overwrite(inode, &pagep, + start_idx, &fsdata); + + /* should not be a normal cluster */ + f2fs_bug_on(F2FS_I_SB(inode), err == 0); + + if (err <= 0) + return err; + + if (err > 0) { + struct page **rpages = fsdata; + int cluster_size = F2FS_I(inode)->i_cluster_size; + int i; + + for (i = cluster_size - 1; i >= 0; i--) { + loff_t start = rpages[i]->index << PAGE_SHIFT; + + if (from <= start) { + zero_user_segment(rpages[i], 0, PAGE_SIZE); + } else { + zero_user_segment(rpages[i], from - start, + PAGE_SIZE); + break; + } + } + + f2fs_compress_write_end(inode, fsdata, start_idx, true); + } + return 0; +} + static int f2fs_write_compressed_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, @@ -975,7 +1114,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .submitted = false, .io_type = io_type, .io_wbc = wbc, - .encrypted = f2fs_encrypted_file(cc->inode), + .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode), }; struct dnode_of_data dn; struct node_info ni; @@ -985,8 +1124,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - if (!f2fs_trylock_op(sbi)) + if (IS_NOQUOTA(inode)) { + /* + * We need to wait for node_write to avoid block allocation during + * checkpoint. This can only happen to quota writes which can cause + * the below discard race condition. + */ + down_read(&sbi->node_write); + } else if (!f2fs_trylock_op(sbi)) { return -EAGAIN; + } set_new_dnode(&dn, cc->inode, NULL, NULL, 0); @@ -1026,6 +1173,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, f2fs_set_compressed_page(cc->cpages[i], inode, cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; + + fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page, + dn.ofs_in_node + i + 1); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, fio.old_blkaddr); + if (fio.encrypted) { fio.page = cc->rpages[i + 1]; err = f2fs_encrypt_one_page(&fio); @@ -1092,7 +1246,10 @@ unlock_continue: set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + if (IS_NOQUOTA(inode)) + up_read(&sbi->node_write); + else + f2fs_unlock_op(sbi); spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) @@ -1118,7 +1275,10 @@ out_put_cic: out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: - f2fs_unlock_op(sbi); + if (IS_NOQUOTA(inode)) + up_read(&sbi->node_write); + else + f2fs_unlock_op(sbi); return -EAGAIN; } @@ -1132,7 +1292,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) if (unlikely(bio->bi_status)) mapping_set_error(cic->inode->i_mapping, -EIO); - f2fs_put_compressed_page(page); + f2fs_compress_free_page(page); dec_page_count(sbi, F2FS_WB_DATA); @@ -1197,6 +1357,12 @@ retry_write: congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); lock_page(cc->rpages[i]); + + if (!PageDirty(cc->rpages[i])) { + unlock_page(cc->rpages[i]); + continue; + } + clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; } @@ -1240,6 +1406,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, err = f2fs_write_compressed_pages(cc, submitted, wbc, io_type); cops->destroy_compress_ctx(cc); + kfree(cc->cpages); + cc->cpages = NULL; if (!err) return 0; f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN); @@ -1293,7 +1461,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) for (i = 0; i < dic->nr_cpages; i++) { struct page *page; - page = f2fs_grab_page(); + page = f2fs_compress_alloc_page(); if (!page) goto out_free; @@ -1302,22 +1470,6 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->cpages[i] = page; } - dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * - dic->cluster_size, GFP_NOFS); - if (!dic->tpages) - goto out_free; - - for (i = 0; i < dic->cluster_size; i++) { - if (cc->rpages[i]) { - dic->tpages[i] = cc->rpages[i]; - continue; - } - - dic->tpages[i] = f2fs_grab_page(); - if (!dic->tpages[i]) - goto out_free; - } - return dic; out_free: @@ -1335,8 +1487,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) continue; if (!dic->tpages[i]) continue; - unlock_page(dic->tpages[i]); - put_page(dic->tpages[i]); + f2fs_compress_free_page(dic->tpages[i]); } kfree(dic->tpages); } @@ -1345,7 +1496,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->nr_cpages; i++) { if (!dic->cpages[i]) continue; - f2fs_put_compressed_page(dic->cpages[i]); + f2fs_compress_free_page(dic->cpages[i]); } kfree(dic->cpages); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdf2f626bea7..73683e58a08d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -14,11 +14,13 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/blk-crypto.h> #include <linux/swap.h> #include <linux/prefetch.h> #include <linux/uio.h> #include <linux/cleancache.h> #include <linux/sched/signal.h> +#include <linux/fiemap.h> #include "f2fs.h" #include "node.h" @@ -85,7 +87,7 @@ static bool __is_cp_guaranteed(struct page *page) sbi = F2FS_I_SB(inode); if (inode->i_ino == F2FS_META_INO(sbi) || - inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || (S_ISREG(inode->i_mode) && (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || @@ -114,7 +116,8 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_DECRYPT, - STEP_DECOMPRESS, + STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */ + STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */ STEP_VERITY, }; @@ -457,6 +460,33 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) return bio; } +static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + pgoff_t first_idx, + const struct f2fs_io_info *fio, + gfp_t gfp_mask) +{ + /* + * The f2fs garbage collector sets ->encrypted_page when it wants to + * read/write raw data without encryption. + */ + if (!fio || !fio->encrypted_page) + fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); +} + +static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, + pgoff_t next_idx, + const struct f2fs_io_info *fio) +{ + /* + * The f2fs garbage collector sets ->encrypted_page when it wants to + * read/write raw data without encryption. + */ + if (fio && fio->encrypted_page) + return !bio_has_crypt_ctx(bio); + + return fscrypt_mergeable_bio(bio, inode, next_idx); +} + static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { @@ -513,6 +543,34 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi, __submit_bio(sbi, bio, type); } +static void __attach_io_flag(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; + unsigned int io_flag, fua_flag, meta_flag; + + if (fio->type == DATA) + io_flag = sbi->data_io_flag; + else if (fio->type == NODE) + io_flag = sbi->node_io_flag; + else + return; + + fua_flag = io_flag & temp_mask; + meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; + + /* + * data/node io flag bits per temp: + * REQ_META | REQ_FUA | + * 5 | 4 | 3 | 2 | 1 | 0 | + * Cold | Warm | Hot | Cold | Warm | Hot | + */ + if ((1 << fio->temp) & meta_flag) + fio->op_flags |= REQ_META; + if ((1 << fio->temp) & fua_flag) + fio->op_flags |= REQ_FUA; +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -520,6 +578,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; + __attach_io_flag(fio); bio_set_op_attrs(io->bio, fio->op, fio->op_flags); if (is_read_io(fio->op)) @@ -653,6 +712,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) /* Allocate a new bio */ bio = __bio_alloc(fio, 1); + f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, GFP_NOIO); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; @@ -661,6 +723,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (fio->io_wbc && !is_read_io(fio->op)) wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); + __attach_io_flag(fio); bio_set_op_attrs(bio, fio->op, fio->op_flags); inc_page_count(fio->sbi, is_read_io(fio->op) ? @@ -731,9 +794,10 @@ static void del_bio_entry(struct bio_entry *be) kmem_cache_free(bio_entry_slab, be); } -static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, +static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct page *page) { + struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; int ret = -EAGAIN; @@ -750,13 +814,19 @@ static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, found = true; - if (bio_add_page(*bio, page, PAGE_SIZE, 0) == - PAGE_SIZE) { + f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio, + *fio->last_block, + fio->new_blkaddr)); + if (f2fs_crypt_mergeable_bio(*bio, + fio->page->mapping->host, + fio->page->index, fio) && + bio_add_page(*bio, page, PAGE_SIZE, 0) == + PAGE_SIZE) { ret = 0; break; } - /* bio is full */ + /* page can't be merged into bio; submit the bio */ del_bio_entry(be); __submit_bio(sbi, *bio, DATA); break; @@ -847,11 +917,14 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_PAGES); + __attach_io_flag(fio); + f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, GFP_NOIO); bio_set_op_attrs(bio, fio->op, fio->op_flags); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { - if (add_ipu_page(fio->sbi, &bio, page)) + if (add_ipu_page(fio, &bio, page)) goto alloc_new; } @@ -903,8 +976,11 @@ next: inc_page_count(sbi, WB_DATA_TYPE(bio_page)); - if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio, - io->last_block_in_bio, fio->new_blkaddr)) + if (io->bio && + (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, + fio->new_blkaddr) || + !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, + bio_page->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { @@ -916,6 +992,8 @@ alloc_new: goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_PAGES); + f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, + bio_page->index, fio, GFP_NOIO); io->fio = *fio; } @@ -960,14 +1038,17 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, for_write); if (!bio) return ERR_PTR(-ENOMEM); + + f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); + f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; bio_set_op_attrs(bio, REQ_OP_READ, op_flag); - if (f2fs_encrypted_file(inode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; if (f2fs_compressed_file(inode)) - post_read_steps |= 1 << STEP_DECOMPRESS; + post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ; if (f2fs_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; @@ -992,12 +1073,13 @@ static void f2fs_release_read_bio(struct bio *bio) /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, - block_t blkaddr, bool for_write) + block_t blkaddr, int op_flags, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, + page->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1010,6 +1092,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); __submit_bio(sbi, bio, DATA); return 0; } @@ -1111,7 +1194,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) { - struct extent_info ei = {0,0,0}; + struct extent_info ei = {0, 0, 0}; struct inode *inode = dn->inode; if (f2fs_lookup_extent_cache(inode, index, &ei)) { @@ -1183,7 +1266,8 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + op_flags, for_write); if (err) goto put_err; return page; @@ -1332,7 +1416,7 @@ alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL, false); + &sum, seg_type, NULL); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); @@ -1392,7 +1476,7 @@ map_blocks: return err; } -void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) @@ -1457,7 +1541,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, next_dnode: if (map->m_may_create) - __do_map_lock(sbi, flag, true); + f2fs_do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1606,7 +1690,7 @@ skip: f2fs_put_dnode(&dn); if (map->m_may_create) { - __do_map_lock(sbi, flag, false); + f2fs_do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; @@ -1632,7 +1716,7 @@ sync_out: f2fs_put_dnode(&dn); unlock_out: if (map->m_may_create) { - __do_map_lock(sbi, flag, false); + f2fs_do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } out: @@ -1779,6 +1863,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_LAST; err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + trace_f2fs_fiemap(inode, 0, phys, len, flags, err); if (err || err == 1) return err; } @@ -1802,12 +1887,33 @@ static int f2fs_xattr_fiemap(struct inode *inode, flags = FIEMAP_EXTENT_LAST; } - if (phys) + if (phys) { err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + trace_f2fs_fiemap(inode, 0, phys, len, flags, err); + } return (err < 0 ? err : 0); } +static loff_t max_inode_blocks(struct inode *inode) +{ + loff_t result = ADDRS_PER_INODE(inode); + loff_t leaf_count = ADDRS_PER_BLOCK(inode); + + /* two direct node blocks */ + result += (leaf_count * 2); + + /* two indirect node blocks */ + leaf_count *= NIDS_PER_BLOCK; + result += (leaf_count * 2); + + /* one double indirect node block */ + leaf_count *= NIDS_PER_BLOCK; + result += leaf_count; + + return result; +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1817,6 +1923,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 logical = 0, phys = 0, size = 0; u32 flags = 0; int ret = 0; + bool compr_cluster = false; + unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { ret = f2fs_precache_extents(inode); @@ -1824,7 +1932,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return ret; } - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); + ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR); if (ret) return ret; @@ -1851,6 +1959,9 @@ next: memset(&map_bh, 0, sizeof(struct buffer_head)); map_bh.b_size = len; + if (compr_cluster) + map_bh.b_size = blk_to_logical(inode, cluster_size - 1); + ret = get_data_block(inode, start_blk, &map_bh, 0, F2FS_GET_BLOCK_FIEMAP, &next_pgofs); if (ret) @@ -1861,7 +1972,7 @@ next: start_blk = next_pgofs; if (blk_to_logical(inode, start_blk) < blk_to_logical(inode, - F2FS_I_SB(inode)->max_file_blocks)) + max_inode_blocks(inode))) goto prep_next; flags |= FIEMAP_EXTENT_LAST; @@ -1873,11 +1984,39 @@ next: ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); + trace_f2fs_fiemap(inode, logical, phys, size, flags, ret); + if (ret) + goto out; + size = 0; } - if (start_blk > last_blk || ret) + if (start_blk > last_blk) goto out; + if (compr_cluster) { + compr_cluster = false; + + + logical = blk_to_logical(inode, start_blk - 1); + phys = blk_to_logical(inode, map_bh.b_blocknr); + size = blk_to_logical(inode, cluster_size); + + flags |= FIEMAP_EXTENT_ENCODED; + + start_blk += cluster_size - 1; + + if (start_blk > last_blk) + goto out; + + goto prep_next; + } + + if (map_bh.b_blocknr == COMPRESS_ADDR) { + compr_cluster = true; + start_blk++; + goto prep_next; + } + logical = blk_to_logical(inode, start_blk); phys = blk_to_logical(inode, map_bh.b_blocknr); size = map_bh.b_size; @@ -1988,8 +2127,9 @@ zero_out: * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio, - *last_block_in_bio, block_nr)) { + if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, + *last_block_in_bio, block_nr) || + !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { submit_and_realloc: __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; @@ -2015,6 +2155,7 @@ submit_and_realloc: goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = block_nr; goto out; @@ -2078,9 +2219,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (ret) goto out; - /* cluster was overwritten as normal cluster */ - if (dn.data_blkaddr != COMPRESS_ADDR) - goto out; + f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; @@ -2113,12 +2252,14 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, for (i = 0; i < dic->nr_cpages; i++) { struct page *page = dic->cpages[i]; block_t blkaddr; + struct bio_post_read_ctx *ctx; blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); - if (bio && !page_is_mergeable(sbi, bio, - *last_block_in_bio, blkaddr)) { + if (bio && (!page_is_mergeable(sbi, bio, + *last_block_in_bio, blkaddr) || + !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { submit_and_realloc: __submit_bio(sbi, bio, DATA); bio = NULL; @@ -2130,16 +2271,16 @@ submit_and_realloc: page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - bio = NULL; dic->failed = true; if (refcount_sub_and_test(dic->nr_cpages - i, - &dic->ref)) + &dic->ref)) { f2fs_decompress_end_io(dic->rpages, cc->cluster_size, true, false); - f2fs_free_dic(dic); + f2fs_free_dic(dic); + } f2fs_put_dnode(&dn); - *bio_ret = bio; + *bio_ret = NULL; return ret; } } @@ -2149,7 +2290,14 @@ submit_and_realloc: if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + /* tag STEP_DECOMPRESS to handle IO in wq */ + ctx = bio->bi_private; + if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS))) + ctx->enabled_steps |= 1 << STEP_DECOMPRESS; + inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } @@ -2177,13 +2325,11 @@ out: * use ->readpage() or do the necessary surgery to decouple ->readpages() * from read-ahead. */ -int f2fs_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages, bool is_readahead) +static int f2fs_mpage_readpages(struct inode *inode, + struct readahead_control *rac, struct page *page) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; - struct inode *inode = mapping->host; struct f2fs_map_blocks map; #ifdef CONFIG_F2FS_FS_COMPRESSION struct compress_ctx cc = { @@ -2197,8 +2343,10 @@ int f2fs_mpage_readpages(struct address_space *mapping, .nr_cpages = 0, }; #endif + unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; int ret = 0; + bool drop_ra = false; map.m_pblk = 0; map.m_lblk = 0; @@ -2209,16 +2357,26 @@ int f2fs_mpage_readpages(struct address_space *mapping, map.m_seg_type = NO_CHECK_TYPE; map.m_may_create = false; - for (; nr_pages; nr_pages--) { - if (pages) { - page = list_last_entry(pages, struct page, lru); + /* + * Two readahead threads for same address range can cause race condition + * which fragments sequential read IOs. So let's avoid each other. + */ + if (rac && readahead_count(rac)) { + if (READ_ONCE(F2FS_I(inode)->ra_offset) == readahead_index(rac)) + drop_ra = true; + else + WRITE_ONCE(F2FS_I(inode)->ra_offset, + readahead_index(rac)); + } + for (; nr_pages; nr_pages--) { + if (rac) { + page = readahead_page(rac); prefetchw(&page->flags); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, - page_index(page), - readahead_gfp_mask(mapping))) - goto next_page; + if (drop_ra) { + f2fs_put_page(page, 1); + continue; + } } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -2228,7 +2386,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead, false); + rac != NULL, false); f2fs_destroy_compress_ctx(&cc); if (ret) goto set_error_page; @@ -2251,7 +2409,7 @@ read_single_page: #endif ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, - &bio, &last_block_in_bio, is_readahead); + &bio, &last_block_in_bio, rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: @@ -2260,8 +2418,10 @@ set_error_page: zero_user_segment(page, 0, PAGE_SIZE); unlock_page(page); } +#ifdef CONFIG_F2FS_FS_COMPRESSION next_page: - if (pages) +#endif + if (rac) put_page(page); #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -2271,16 +2431,18 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead, false); + rac != NULL, false); f2fs_destroy_compress_ctx(&cc); } } #endif } - BUG_ON(pages && !list_empty(pages)); if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); - return pages ? 0 : ret; + + if (rac && readahead_count(rac) && !drop_ra) + WRITE_ONCE(F2FS_I(inode)->ra_offset, -1); + return ret; } static int f2fs_read_data_page(struct file *file, struct page *page) @@ -2299,28 +2461,24 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page_file_mapping(page), - NULL, page, 1, false); + ret = f2fs_mpage_readpages(inode, NULL, page); return ret; } -static int f2fs_read_data_pages(struct file *file, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void f2fs_readahead(struct readahead_control *rac) { - struct inode *inode = mapping->host; - struct page *page = list_last_entry(pages, struct page, lru); + struct inode *inode = rac->mapping->host; - trace_f2fs_readpages(inode, page, nr_pages); + trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac)); if (!f2fs_is_compress_backend_ready(inode)) - return 0; + return; /* If the file has inline data, skip readpages */ if (f2fs_has_inline_data(inode)) - return 0; + return; - return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); + f2fs_mpage_readpages(inode, rac, NULL); } int f2fs_encrypt_one_page(struct f2fs_io_info *fio) @@ -2337,6 +2495,9 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); + if (fscrypt_inode_uses_inline_crypto(inode)) + return 0; + retry_encrypt: fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE, 0, gfp_flags); @@ -2510,7 +2671,7 @@ got_it: f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); if (err) { - if (f2fs_encrypted_file(inode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); if (PageWriteback(page)) end_page_writeback(page); @@ -2633,10 +2794,22 @@ write: f2fs_available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* Dentry blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode)) { + /* Dentry/quota blocks are controlled by checkpoint */ + if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { + /* + * We need to wait for node_write to avoid block allocation during + * checkpoint. This can only happen to quota writes which can cause + * the below discard race condition. + */ + if (IS_NOQUOTA(inode)) + down_read(&sbi->node_write); + fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); + + if (IS_NOQUOTA(inode)) + up_read(&sbi->node_write); + goto done; } @@ -2772,11 +2945,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping, }; #endif int nr_pages; - pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - int cycled; int range_whole = 0; xa_mark_t tag; int nwritten = 0; @@ -2792,19 +2963,13 @@ static int f2fs_write_cache_pages(struct address_space *mapping, clear_inode_flag(mapping->host, FI_HOT_DATA); if (wbc->range_cyclic) { - writeback_index = mapping->writeback_index; /* prev offset */ - index = writeback_index; - if (index == 0) - cycled = 1; - else - cycled = 0; + index = mapping->writeback_index; /* prev offset */ end = -1; } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; @@ -2969,12 +3134,13 @@ next: } } #endif - if ((!cycled && !done) || retry) { - cycled = 1; + if (retry) { index = 0; - end = writeback_index - 1; + end = -1; goto retry; } + if (wbc->range_cyclic && !done) + done_index = 0; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; @@ -3138,7 +3304,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, flag, true); + f2fs_do_map_lock(sbi, flag, true); locked = true; } @@ -3175,7 +3341,7 @@ restart: err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; @@ -3191,7 +3357,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, flag, false); + f2fs_do_map_lock(sbi, flag, false); return err; } @@ -3303,7 +3469,7 @@ repeat: err = -EFSCORRUPTED; goto fail; } - err = f2fs_submit_page_read(inode, page, blkaddr, true); + err = f2fs_submit_page_read(inode, page, blkaddr, 0, true); if (err) goto fail; @@ -3353,6 +3519,10 @@ static int f2fs_write_end(struct file *file, if (f2fs_compressed_file(inode) && fsdata) { f2fs_compress_write_end(inode, fsdata, page->index, copied); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + + if (pos + copied > i_size_read(inode) && + !f2fs_verity_in_progress(inode)) + f2fs_i_size_write(inode, pos + copied); return copied; } #endif @@ -3380,6 +3550,9 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, unsigned long align = offset | iov_iter_alignment(iter); struct block_device *bdev = inode->i_sb->s_bdev; + if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode)) + return 1; + if (align & blocksize_mask) { if (bdev) blkbits = blksize_bits(bdev_logical_block_size(bdev)); @@ -3503,6 +3676,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } + } else { + if (err > 0) + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err); } out: @@ -3586,18 +3762,59 @@ static int f2fs_set_data_page_dirty(struct page *page) return 0; } + +static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct dnode_of_data dn; + sector_t start_idx, blknr = 0; + int ret; + + start_idx = round_down(block, F2FS_I(inode)->i_cluster_size); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (ret) + return 0; + + if (dn.data_blkaddr != COMPRESS_ADDR) { + dn.ofs_in_node += block - start_idx; + blknr = f2fs_data_blkaddr(&dn); + if (!__is_valid_data_blkaddr(blknr)) + blknr = 0; + } + + f2fs_put_dnode(&dn); + return blknr; +#else + return 0; +#endif +} + + static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; + struct buffer_head tmp = { + .b_size = i_blocksize(inode), + }; + sector_t blknr = 0; if (f2fs_has_inline_data(inode)) - return 0; + goto out; /* make sure allocating whole blocks */ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) filemap_write_and_wait(mapping); - return generic_block_bmap(mapping, block, get_data_block_bmap); + if (f2fs_compressed_file(inode)) + blknr = f2fs_bmap_compress(inode, block); + + if (!get_data_block_bmap(inode, block, &tmp, 0)) + blknr = tmp.b_blocknr; +out: + trace_f2fs_bmap(inode, block, blknr); + return blknr; } #ifdef CONFIG_MIGRATION @@ -3805,7 +4022,7 @@ static void f2fs_swap_deactivate(struct file *file) const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, - .readpages = f2fs_read_data_pages, + .readahead = f2fs_readahead, .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0dbcb0f9c019..4276c0f79beb 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -174,6 +174,26 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = META_CP; i < META_MAX; i++) si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < NO_CHECK_TYPE; i++) { + si->dirty_seg[i] = 0; + si->full_seg[i] = 0; + si->valid_blks[i] = 0; + } + + for (i = 0; i < MAIN_SEGS(sbi); i++) { + int blks = get_seg_entry(sbi, i)->valid_blocks; + int type = get_seg_entry(sbi, i)->type; + + if (!blks) + continue; + + if (blks == sbi->blocks_per_seg) + si->full_seg[type]++; + else + si->dirty_seg[type]++; + si->valid_blks[type] += blks; + } + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -329,30 +349,50 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); - seq_printf(s, " - COLD data: %d, %d, %d\n", + seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n", + "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk"); + seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_COLD_DATA], si->cursec[CURSEG_COLD_DATA], - si->curzone[CURSEG_COLD_DATA]); - seq_printf(s, " - WARM data: %d, %d, %d\n", + si->curzone[CURSEG_COLD_DATA], + si->dirty_seg[CURSEG_COLD_DATA], + si->full_seg[CURSEG_COLD_DATA], + si->valid_blks[CURSEG_COLD_DATA]); + seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_WARM_DATA], si->cursec[CURSEG_WARM_DATA], - si->curzone[CURSEG_WARM_DATA]); - seq_printf(s, " - HOT data: %d, %d, %d\n", + si->curzone[CURSEG_WARM_DATA], + si->dirty_seg[CURSEG_WARM_DATA], + si->full_seg[CURSEG_WARM_DATA], + si->valid_blks[CURSEG_WARM_DATA]); + seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_HOT_DATA], si->cursec[CURSEG_HOT_DATA], - si->curzone[CURSEG_HOT_DATA]); - seq_printf(s, " - Dir dnode: %d, %d, %d\n", + si->curzone[CURSEG_HOT_DATA], + si->dirty_seg[CURSEG_HOT_DATA], + si->full_seg[CURSEG_HOT_DATA], + si->valid_blks[CURSEG_HOT_DATA]); + seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_HOT_NODE], si->cursec[CURSEG_HOT_NODE], - si->curzone[CURSEG_HOT_NODE]); - seq_printf(s, " - File dnode: %d, %d, %d\n", + si->curzone[CURSEG_HOT_NODE], + si->dirty_seg[CURSEG_HOT_NODE], + si->full_seg[CURSEG_HOT_NODE], + si->valid_blks[CURSEG_HOT_NODE]); + seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_WARM_NODE], si->cursec[CURSEG_WARM_NODE], - si->curzone[CURSEG_WARM_NODE]); - seq_printf(s, " - Indir nodes: %d, %d, %d\n", + si->curzone[CURSEG_WARM_NODE], + si->dirty_seg[CURSEG_WARM_NODE], + si->full_seg[CURSEG_WARM_NODE], + si->valid_blks[CURSEG_WARM_NODE]); + seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_COLD_NODE], si->cursec[CURSEG_COLD_NODE], - si->curzone[CURSEG_COLD_NODE]); + si->curzone[CURSEG_COLD_NODE], + si->dirty_seg[CURSEG_COLD_NODE], + si->full_seg[CURSEG_COLD_NODE], + si->valid_blks[CURSEG_COLD_NODE]); seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", si->main_area_segs - si->dirty_count - si->prefree_count - si->free_segs, diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 44bfc464df78..069f498af1e3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -70,6 +70,111 @@ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) return DT_UNKNOWN; } +/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ +int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname) +{ +#ifdef CONFIG_UNICODE + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + + if (IS_CASEFOLDED(dir)) { + fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, + GFP_NOFS); + if (!fname->cf_name.name) + return -ENOMEM; + fname->cf_name.len = utf8_casefold(sbi->s_encoding, + fname->usr_fname, + fname->cf_name.name, + F2FS_NAME_LEN); + if ((int)fname->cf_name.len <= 0) { + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; + if (f2fs_has_strict_mode(sbi)) + return -EINVAL; + /* fall back to treating name as opaque byte sequence */ + } + } +#endif + return 0; +} + +static int __f2fs_setup_filename(const struct inode *dir, + const struct fscrypt_name *crypt_name, + struct f2fs_filename *fname) +{ + int err; + + memset(fname, 0, sizeof(*fname)); + + fname->usr_fname = crypt_name->usr_fname; + fname->disk_name = crypt_name->disk_name; +#ifdef CONFIG_FS_ENCRYPTION + fname->crypto_buf = crypt_name->crypto_buf; +#endif + if (crypt_name->is_ciphertext_name) { + /* hash was decoded from the no-key name */ + fname->hash = cpu_to_le32(crypt_name->hash); + } else { + err = f2fs_init_casefolded_name(dir, fname); + if (err) { + f2fs_free_filename(fname); + return err; + } + f2fs_hash_filename(dir, fname); + } + return 0; +} + +/* + * Prepare to search for @iname in @dir. This is similar to + * fscrypt_setup_filename(), but this also handles computing the casefolded name + * and the f2fs dirhash if needed, then packing all the information about this + * filename up into a 'struct f2fs_filename'. + */ +int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname) +{ + struct fscrypt_name crypt_name; + int err; + + err = fscrypt_setup_filename(dir, iname, lookup, &crypt_name); + if (err) + return err; + + return __f2fs_setup_filename(dir, &crypt_name, fname); +} + +/* + * Prepare to look up @dentry in @dir. This is similar to + * fscrypt_prepare_lookup(), but this also handles computing the casefolded name + * and the f2fs dirhash if needed, then packing all the information about this + * filename up into a 'struct f2fs_filename'. + */ +int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct f2fs_filename *fname) +{ + struct fscrypt_name crypt_name; + int err; + + err = fscrypt_prepare_lookup(dir, dentry, &crypt_name); + if (err) + return err; + + return __f2fs_setup_filename(dir, &crypt_name, fname); +} + +void f2fs_free_filename(struct f2fs_filename *fname) +{ +#ifdef CONFIG_FS_ENCRYPTION + kfree(fname->crypto_buf.name); + fname->crypto_buf.name = NULL; +#endif +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} + static unsigned long dir_block_index(unsigned int level, int dir_level, unsigned int idx) { @@ -84,8 +189,7 @@ static unsigned long dir_block_index(unsigned int level, static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct page *dentry_page, - struct fscrypt_name *fname, - f2fs_hash_t namehash, + const struct f2fs_filename *fname, int *max_slots, struct page **res_page) { @@ -96,7 +200,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(dir, &d, dentry_blk); - de = f2fs_find_target_dentry(fname, namehash, max_slots, &d); + de = f2fs_find_target_dentry(&d, fname, max_slots); if (de) *res_page = dentry_page; @@ -107,112 +211,57 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, /* * Test whether a case-insensitive directory entry matches the filename * being searched for. - * - * Returns: 0 if the directory entry matches, more than 0 if it - * doesn't match or less than zero on error. */ -int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry, bool quick) +static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, + const u8 *de_name, u32 de_name_len) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); + const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); const struct unicode_map *um = sbi->s_encoding; - int ret; - - if (quick) - ret = utf8_strncasecmp_folded(um, name, entry); - else - ret = utf8_strncasecmp(um, name, entry); + struct qstr entry = QSTR_INIT(de_name, de_name_len); + int res; - if (ret < 0) { - /* Handle invalid character sequence as either an error - * or as an opaque byte sequence. + res = utf8_strncasecmp_folded(um, name, &entry); + if (res < 0) { + /* + * In strict mode, ignore invalid names. In non-strict mode, + * fall back to treating them as opaque byte sequences. */ - if (f2fs_has_strict_mode(sbi)) - return -EINVAL; - - if (name->len != entry->len) - return 1; - - return !!memcmp(name->name, entry->name, name->len); + if (f2fs_has_strict_mode(sbi) || name->len != entry.len) + return false; + return !memcmp(name->name, entry.name, name->len); } - - return ret; + return res == 0; } +#endif /* CONFIG_UNICODE */ -static void f2fs_fname_setup_ci_filename(struct inode *dir, - const struct qstr *iname, - struct fscrypt_str *cf_name) +static inline bool f2fs_match_name(const struct inode *dir, + const struct f2fs_filename *fname, + const u8 *de_name, u32 de_name_len) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - if (!IS_CASEFOLDED(dir)) { - cf_name->name = NULL; - return; - } + struct fscrypt_name f; - cf_name->name = f2fs_kmalloc(sbi, F2FS_NAME_LEN, GFP_NOFS); - if (!cf_name->name) - return; - - cf_name->len = utf8_casefold(sbi->s_encoding, - iname, cf_name->name, - F2FS_NAME_LEN); - if ((int)cf_name->len <= 0) { - kvfree(cf_name->name); - cf_name->name = NULL; - } -} -#endif - -static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, - struct f2fs_dir_entry *de, - struct fscrypt_name *fname, - struct fscrypt_str *cf_str, - unsigned long bit_pos, - f2fs_hash_t namehash) -{ #ifdef CONFIG_UNICODE - struct inode *parent = d->inode; - struct f2fs_sb_info *sbi = F2FS_I_SB(parent); - struct qstr entry; -#endif - - if (de->hash_code != namehash) - return false; + if (fname->cf_name.name) { + struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); -#ifdef CONFIG_UNICODE - entry.name = d->filename[bit_pos]; - entry.len = de->name_len; - - if (sbi->s_encoding && IS_CASEFOLDED(parent)) { - if (cf_str->name) { - struct qstr cf = {.name = cf_str->name, - .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, &entry, true); - } - return !f2fs_ci_compare(parent, fname->usr_fname, &entry, - false); + return f2fs_match_ci_name(dir, &cf, de_name, de_name_len); } #endif - if (fscrypt_match_name(fname, d->filename[bit_pos], - le16_to_cpu(de->name_len))) - return true; - return false; + f.usr_fname = fname->usr_fname; + f.disk_name = fname->disk_name; +#ifdef CONFIG_FS_ENCRYPTION + f.crypto_buf = fname->crypto_buf; +#endif + return fscrypt_match_name(&f, de_name, de_name_len); } -struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, - f2fs_hash_t namehash, int *max_slots, - struct f2fs_dentry_ptr *d) +struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, + const struct f2fs_filename *fname, int *max_slots) { struct f2fs_dir_entry *de; - struct fscrypt_str cf_str = { .name = NULL, .len = 0 }; unsigned long bit_pos = 0; int max_len = 0; -#ifdef CONFIG_UNICODE - f2fs_fname_setup_ci_filename(d->inode, fname->usr_fname, &cf_str); -#endif - if (max_slots) *max_slots = 0; while (bit_pos < d->max) { @@ -229,7 +278,9 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, continue; } - if (f2fs_match_name(d, de, fname, &cf_str, bit_pos, namehash)) + if (de->hash_code == fname->hash && + f2fs_match_name(d->inode, fname, d->filename[bit_pos], + le16_to_cpu(de->name_len))) goto found; if (max_slots && max_len > *max_slots) @@ -243,33 +294,27 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, found: if (max_slots && max_len > *max_slots) *max_slots = max_len; - -#ifdef CONFIG_UNICODE - kvfree(cf_str.name); -#endif return de; } static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, - struct fscrypt_name *fname, + const struct f2fs_filename *fname, struct page **res_page) { - struct qstr name = FSTR_TO_QSTR(&fname->disk_name); - int s = GET_DENTRY_SLOTS(name.len); + int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; - f2fs_hash_t namehash = f2fs_dentry_hash(dir, &name, fname); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(namehash) % nbucket); + le32_to_cpu(fname->hash) % nbucket); end_block = bidx + nblock; for (; bidx < end_block; bidx++) { @@ -285,8 +330,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dir, dentry_page, fname, namehash, - &max_slots, res_page); + de = find_in_block(dir, dentry_page, fname, &max_slots, + res_page); if (de) break; @@ -295,8 +340,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - if (!de && room && F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; + if (!de && room && F2FS_I(dir)->chash != fname->hash) { + F2FS_I(dir)->chash = fname->hash; F2FS_I(dir)->clevel = level; } @@ -304,7 +349,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page) + const struct f2fs_filename *fname, + struct page **res_page) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; @@ -353,18 +399,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, const struct qstr *child, struct page **res_page) { struct f2fs_dir_entry *de = NULL; - struct fscrypt_name fname; + struct f2fs_filename fname; int err; -#ifdef CONFIG_UNICODE - if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && - utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { - *res_page = ERR_PTR(-EINVAL); - return NULL; - } -#endif - - err = fscrypt_setup_filename(dir, child, 1, &fname); + err = f2fs_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) *res_page = NULL; @@ -375,7 +413,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, de = __f2fs_find_entry(dir, &fname, res_page); - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); return de; } @@ -416,7 +454,8 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -static void init_dent_inode(const struct qstr *name, struct page *ipage) +static void init_dent_inode(const struct f2fs_filename *fname, + struct page *ipage) { struct f2fs_inode *ri; @@ -424,16 +463,16 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); - ri->i_namelen = cpu_to_le32(name->len); - memcpy(ri->i_name, name->name, name->len); + ri->i_namelen = cpu_to_le32(fname->disk_name.len); + memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); set_page_dirty(ipage); } void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { - struct qstr dot = QSTR_INIT(".", 1); - struct qstr dotdot = QSTR_INIT("..", 2); + struct fscrypt_str dot = FSTR_INIT(".", 1); + struct fscrypt_str dotdot = FSTR_INIT("..", 2); /* update dirent of "." */ f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0); @@ -467,8 +506,7 @@ static int make_empty_dir(struct inode *inode, } struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct qstr *new_name, const struct qstr *orig_name, - struct page *dpage) + const struct f2fs_filename *fname, struct page *dpage) { struct page *page; int err; @@ -493,7 +531,8 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - err = f2fs_init_security(inode, dir, orig_name, page); + err = f2fs_init_security(inode, dir, + fname ? fname->usr_fname : NULL, page); if (err) goto put_error; @@ -508,8 +547,8 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, return page; } - if (new_name) { - init_dent_inode(new_name, page); + if (fname) { + init_dent_inode(fname, page); if (IS_ENCRYPTED(dir)) file_set_enc_name(inode); } @@ -577,11 +616,11 @@ next: } bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, - struct fscrypt_name *fname) + const struct f2fs_filename *fname) { struct f2fs_dentry_ptr d; unsigned int bit_pos; - int slots = GET_DENTRY_SLOTS(fname_len(fname)); + int slots = GET_DENTRY_SLOTS(fname->disk_name.len); make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); @@ -591,8 +630,8 @@ bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, } void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, - const struct qstr *name, f2fs_hash_t name_hash, - unsigned int bit_pos) + const struct fscrypt_str *name, f2fs_hash_t name_hash, + unsigned int bit_pos) { struct f2fs_dir_entry *de; int slots = GET_DENTRY_SLOTS(name->len); @@ -612,15 +651,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, } } -int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, + struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; - f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -629,11 +666,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, int slots, err = 0; level = 0; - slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); + slots = GET_DENTRY_SLOTS(fname->disk_name.len); current_depth = F2FS_I(dir)->i_current_depth; - if (F2FS_I(dir)->chash == dentry_hash) { + if (F2FS_I(dir)->chash == fname->hash) { level = F2FS_I(dir)->clevel; F2FS_I(dir)->chash = 0; } @@ -655,7 +691,7 @@ start: nblock = bucket_blocks(level); bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - (le32_to_cpu(dentry_hash) % nbucket)); + (le32_to_cpu(fname->hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); @@ -679,8 +715,7 @@ add_dentry: if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, new_name, - orig_name, NULL); + page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -688,7 +723,8 @@ add_dentry: } make_dentry_ptr_block(NULL, &d, dentry_blk); - f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, + bit_pos); set_page_dirty(dentry_page); @@ -712,21 +748,15 @@ fail: return err; } -int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, + struct inode *inode, nid_t ino, umode_t mode) { - struct qstr new_name; int err = -EAGAIN; - new_name.name = fname_name(fname); - new_name.len = fname_len(fname); - if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + err = f2fs_add_inline_entry(dir, fname, inode, ino, mode); if (err == -EAGAIN) - err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + err = f2fs_add_regular_entry(dir, fname, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; @@ -739,17 +769,17 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { - struct fscrypt_name fname; + struct f2fs_filename fname; struct page *page = NULL; struct f2fs_dir_entry *de = NULL; int err; - err = fscrypt_setup_filename(dir, name, 0, &fname); + err = f2fs_setup_filename(dir, name, 0, &fname); if (err) return err; /* - * An immature stakable filesystem shows a race condition between lookup + * An immature stackable filesystem shows a race condition between lookup * and create. If we have same task when doing lookup and create, it's * definitely fine as expected by VFS normally. Otherwise, let's just * verify on-disk dentry one more time, which guarantees filesystem @@ -767,7 +797,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, } else { err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); return err; } @@ -777,7 +807,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, NULL, NULL, NULL); + page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -1080,17 +1110,41 @@ const struct file_operations f2fs_dir_operations = { static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { - struct qstr qstr = {.name = str, .len = len }; const struct dentry *parent = READ_ONCE(dentry->d_parent); - const struct inode *inode = READ_ONCE(parent->d_inode); + const struct inode *dir = READ_ONCE(parent->d_inode); + const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct qstr entry = QSTR_INIT(str, len); + char strbuf[DNAME_INLINE_LEN]; + int res; + + if (!dir || !IS_CASEFOLDED(dir)) + goto fallback; - if (!inode || !IS_CASEFOLDED(inode)) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); + /* + * If the dentry name is stored in-line, then it may be concurrently + * modified by a rename. If this happens, the VFS will eventually retry + * the lookup, so it doesn't matter what ->d_compare() returns. + * However, it's unsafe to call utf8_strncasecmp() with an unstable + * string. Therefore, we have to copy the name into a temporary buffer. + */ + if (len <= DNAME_INLINE_LEN - 1) { + memcpy(strbuf, str, len); + strbuf[len] = 0; + entry.name = strbuf; + /* prevent compiler from optimizing out the temporary buffer */ + barrier(); } - return f2fs_ci_compare(inode, name, &qstr, false); + res = utf8_strncasecmp(sbi->s_encoding, name, &entry); + if (res >= 0) + return res; + + if (f2fs_has_strict_mode(sbi)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); } static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e60078460ad1..686c68b98610 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et, } /* return true, if inode page is changed */ -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; struct extent_tree *et; struct extent_node *en; struct extent_info ei; @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e if (!f2fs_may_extent_tree(inode)) { /* drop largest extent */ if (i_ext && i_ext->len) { + f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; - return true; + set_page_dirty(ipage); + return; } - return false; + return; } et = __grab_extent_tree(inode); if (!i_ext || !i_ext->len) - return false; + return; get_extent_info(&ei, i_ext); @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e } out: write_unlock(&et->lock); - return false; } -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) { - bool ret = __f2fs_init_extent_tree(inode, i_ext); + __f2fs_init_extent_tree(inode, ipage); if (!F2FS_I(inode)->extent_tree) set_inode_flag(inode, FI_NO_EXTENT); - - return ret; } static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ba470d5687fe..d9e52a7f3702 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/f2fs.h * @@ -138,7 +138,8 @@ struct f2fs_mount_info { int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ - bool test_dummy_encryption; /* test dummy encryption */ + struct fscrypt_dummy_context dummy_enc_ctx; /* test dummy encryption */ + block_t unusable_cap_perc; /* percentage for cap */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint */ @@ -194,6 +195,7 @@ enum { #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 #define CP_PAUSE 0x00000040 +#define CP_RESIZE 0x00000080 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ @@ -400,12 +402,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, } /* - * ioctl commands + * f2fs-specific ioctl commands */ -#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION - #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) @@ -428,13 +426,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) - -#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL -#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL - -#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY -#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY -#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT +#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 18, __u64) +#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 19, __u64) +#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ + struct f2fs_sectrim_range) /* * should be same as XFS_IOC_GOINGDOWN. @@ -447,17 +444,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ #define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ -#if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* - * ioctl commands in 32 bit emulation + * Flags used by F2FS_IOC_SEC_TRIM_FILE */ -#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION -#endif - -#define F2FS_IOC_FSGETXATTR FS_IOC_FSGETXATTR -#define F2FS_IOC_FSSETXATTR FS_IOC_FSSETXATTR +#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */ +#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */ +#define F2FS_TRIM_FILE_MASK 0x3 struct f2fs_gc_range { u32 sync; @@ -482,6 +474,12 @@ struct f2fs_flush_device { u32 segments; /* # of segments to flush */ }; +struct f2fs_sectrim_range { + u64 start; + u64 len; + u64 flags; +}; + /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 static inline int get_extra_isize(struct inode *inode); @@ -506,6 +504,42 @@ static inline int get_inline_xattr_addrs(struct inode *inode); * For INODE and NODE manager */ /* for directory operations */ + +struct f2fs_filename { + /* + * The filename the user specified. This is NULL for some + * filesystem-internal operations, e.g. converting an inline directory + * to a non-inline one, or roll-forward recovering an encrypted dentry. + */ + const struct qstr *usr_fname; + + /* + * The on-disk filename. For encrypted directories, this is encrypted. + * This may be NULL for lookups in an encrypted dir without the key. + */ + struct fscrypt_str disk_name; + + /* The dirhash of this filename */ + f2fs_hash_t hash; + +#ifdef CONFIG_FS_ENCRYPTION + /* + * For lookups in encrypted directories: either the buffer backing + * disk_name, or a buffer that holds the decoded no-key name. + */ + struct fscrypt_str crypto_buf; +#endif +#ifdef CONFIG_UNICODE + /* + * For casefolded directories: the casefolded name, but it's left NULL + * if the original name is not valid Unicode or if the filesystem is + * doing an internal operation where usr_fname is also NULL. In these + * cases we fall back to treating the name as an opaque byte sequence. + */ + struct fscrypt_str cf_name; +#endif +}; + struct f2fs_dentry_ptr { struct inode *inode; void *bitmap; @@ -752,6 +786,7 @@ struct f2fs_inode_info { struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ + pgoff_t ra_offset; /* ongoing readahead offset */ struct extent_tree *extent_tree; /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ @@ -1088,8 +1123,9 @@ enum cp_reason_type { }; enum iostat_type { - APP_DIRECT_IO, /* app direct IOs */ - APP_BUFFERED_IO, /* app buffered IOs */ + /* WRITE IO */ + APP_DIRECT_IO, /* app direct write IOs */ + APP_BUFFERED_IO, /* app buffered write IOs */ APP_WRITE_IO, /* app write IOs */ APP_MAPPED_IO, /* app mapped IOs */ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ @@ -1100,6 +1136,19 @@ enum iostat_type { FS_CP_DATA_IO, /* data IOs from checkpoint */ FS_CP_NODE_IO, /* node IOs from checkpoint */ FS_CP_META_IO, /* meta IOs from checkpoint */ + + /* READ IO */ + APP_DIRECT_READ_IO, /* app direct read IOs */ + APP_BUFFERED_READ_IO, /* app buffered read IOs */ + APP_READ_IO, /* app read IOs */ + APP_MAPPED_READ_IO, /* app mapped read IOs */ + FS_DATA_READ_IO, /* data read IOs */ + FS_GDATA_READ_IO, /* data read IOs from background gc */ + FS_CDATA_READ_IO, /* compressed data read IOs */ + FS_NODE_READ_IO, /* node read IOs */ + FS_META_READ_IO, /* meta read IOs */ + + /* other */ FS_DISCARD, /* discard */ NR_IO_TYPE, }; @@ -1211,7 +1260,8 @@ enum { GC_NORMAL, GC_IDLE_CB, GC_IDLE_GREEDY, - GC_URGENT, + GC_URGENT_HIGH, + GC_URGENT_LOW, }; enum { @@ -1257,9 +1307,17 @@ enum fsync_mode { #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) +#ifdef CONFIG_F2FS_IO_TRACE +#define IS_IO_TRACED_PAGE(page) \ + (page_private(page) > 0 && \ + page_private(page) < (unsigned long)PID_MAX_LIMIT) +#else +#define IS_IO_TRACED_PAGE(page) (0) +#endif + #ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ - (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) + (unlikely(F2FS_OPTION(sbi).dummy_enc_ctx.ctx != NULL)) #else #define DUMMY_ENCRYPTION_ENABLED(sbi) (0) #endif @@ -1269,6 +1327,7 @@ enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, COMPRESS_ZSTD, + COMPRESS_LZORLE, COMPRESS_MAX, }; @@ -1381,7 +1440,7 @@ struct f2fs_sb_info { unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ - struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ spinlock_t fsync_node_lock; /* for node entry lock */ struct list_head fsync_node_list; /* node list head */ @@ -1418,7 +1477,6 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ - struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ @@ -1460,8 +1518,9 @@ struct f2fs_sb_info { unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ + /* for skip statistic */ - unsigned int atomic_files; /* # of opened atomic file */ + unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ @@ -1504,8 +1563,15 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; - unsigned long long write_iostat[NR_IO_TYPE]; + unsigned long long rw_iostat[NR_IO_TYPE]; + unsigned long long prev_rw_iostat[NR_IO_TYPE]; bool iostat_enable; + unsigned long iostat_next_period; + unsigned int iostat_period_ms; + + /* to attach REQ_META|REQ_FUA flags */ + unsigned int data_io_flag; + unsigned int node_io_flag; /* For sysfs suppport */ struct kobject s_kobj; @@ -2393,7 +2459,7 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { - if (sbi->gc_mode == GC_URGENT) + if (sbi->gc_mode == GC_URGENT_HIGH) return true; if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || @@ -2411,6 +2477,10 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) return false; + if (sbi->gc_mode == GC_URGENT_LOW && + (type == DISCARD_TIME || type == GC_TIME)) + return true; + return f2fs_time_over(sbi, type); } @@ -2576,7 +2646,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_NEW_INODE: if (set) return; - /* fall through */ + fallthrough; case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: @@ -2586,7 +2656,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, static inline void set_inode_flag(struct inode *inode, int flag) { - test_and_set_bit(flag, F2FS_I(inode)->flags); + set_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, true); } @@ -2597,7 +2667,7 @@ static inline int is_inode_flag_set(struct inode *inode, int flag) static inline void clear_inode_flag(struct inode *inode, int flag) { - test_and_clear_bit(flag, F2FS_I(inode)->flags); + clear_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, false); } @@ -2902,12 +2972,12 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } -static inline bool is_dot_dotdot(const struct qstr *str) +static inline bool is_dot_dotdot(const u8 *name, size_t len) { - if (str->len == 1 && str->name[0] == '.') + if (len == 1 && name[0] == '.') return true; - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + if (len == 2 && name[0] == '.' && name[1] == '.') return true; return false; @@ -2935,18 +3005,12 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { - void *ret; - if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } - ret = kmalloc(size, flags); - if (ret) - return ret; - - return kvmalloc(size, flags); + return kmalloc(size, flags); } static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, @@ -2996,29 +3060,45 @@ static inline int get_inline_xattr_addrs(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ +#define DEFAULT_IOSTAT_PERIOD_MS 3000 +#define MIN_IOSTAT_PERIOD_MS 100 +/* maximum period of iostat tracing is 1 day */ +#define MAX_IOSTAT_PERIOD_MS 8640000 + static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) { int i; spin_lock(&sbi->iostat_lock); - for (i = 0; i < NR_IO_TYPE; i++) - sbi->write_iostat[i] = 0; + for (i = 0; i < NR_IO_TYPE; i++) { + sbi->rw_iostat[i] = 0; + sbi->prev_rw_iostat[i] = 0; + } spin_unlock(&sbi->iostat_lock); } +extern void f2fs_record_iostat(struct f2fs_sb_info *sbi); + static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, enum iostat_type type, unsigned long long io_bytes) { if (!sbi->iostat_enable) return; spin_lock(&sbi->iostat_lock); - sbi->write_iostat[type] += io_bytes; + sbi->rw_iostat[type] += io_bytes; if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->write_iostat[APP_BUFFERED_IO] = - sbi->write_iostat[APP_WRITE_IO] - - sbi->write_iostat[APP_DIRECT_IO]; + sbi->rw_iostat[APP_BUFFERED_IO] = + sbi->rw_iostat[APP_WRITE_IO] - + sbi->rw_iostat[APP_DIRECT_IO]; + + if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_BUFFERED_READ_IO] = + sbi->rw_iostat[APP_READ_IO] - + sbi->rw_iostat[APP_DIRECT_READ_IO]; spin_unlock(&sbi->iostat_lock); + + f2fs_record_iostat(sbi); } #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) @@ -3051,19 +3131,12 @@ static inline void f2fs_set_page_private(struct page *page, if (PagePrivate(page)) return; - get_page(page); - SetPagePrivate(page); - set_page_private(page, data); + attach_page_private(page, (void *)data); } static inline void f2fs_clear_page_private(struct page *page) { - if (!PagePrivate(page)) - return; - - set_page_private(page, 0); - ClearPagePrivate(page); - f2fs_put_page(page, 0); + detach_page_private(page); } /* @@ -3071,6 +3144,7 @@ static inline void f2fs_clear_page_private(struct page *page) */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); +int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); int f2fs_getattr(const struct path *path, struct kstat *stat, @@ -3106,31 +3180,32 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -extern int f2fs_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry, - bool quick); - /* * dir.c */ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de); -struct f2fs_dir_entry *f2fs_find_target_dentry(struct fscrypt_name *fname, - f2fs_hash_t namehash, int *max_slots, - struct f2fs_dentry_ptr *d); +int f2fs_init_casefolded_name(const struct inode *dir, + struct f2fs_filename *fname); +int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct f2fs_filename *fname); +int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct f2fs_filename *fname); +void f2fs_free_filename(struct f2fs_filename *fname); +struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, + const struct f2fs_filename *fname, int *max_slots); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct qstr *new_name, - const struct qstr *orig_name, struct page *dpage); + const struct f2fs_filename *fname, struct page *dpage); void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page); + const struct f2fs_filename *fname, + struct page **res_page); struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, const struct qstr *child, struct page **res_page); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p); @@ -3139,14 +3214,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, - struct fscrypt_name *fname); + const struct f2fs_filename *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, - const struct qstr *name, f2fs_hash_t name_hash, + const struct fscrypt_str *name, f2fs_hash_t name_hash, unsigned int bit_pos); -int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, +int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); -int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, +int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); @@ -3176,8 +3250,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname); +void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname); /* * node.c @@ -3209,6 +3282,7 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); int f2fs_move_node_page(struct page *node_page, int gc_type); +void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id); @@ -3220,7 +3294,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -void f2fs_recover_inline_xattr(struct inode *inode, struct page *page); +int f2fs_recover_inline_xattr(struct inode *inode, struct page *page); int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, @@ -3258,9 +3332,10 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); -void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); +void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); @@ -3283,7 +3358,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio, bool add_list); + struct f2fs_io_info *fio); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); @@ -3373,9 +3448,6 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -int f2fs_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages, bool is_readahead); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); @@ -3384,7 +3456,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); -void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); +void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -3472,6 +3544,9 @@ struct f2fs_stat_info { int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int dirty_seg[NR_CURSEG_TYPE]; + unsigned int full_seg[NR_CURSEG_TYPE]; + unsigned int valid_blks[NR_CURSEG_TYPE]; unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; @@ -3655,7 +3730,7 @@ static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } -static inline void update_sit_info(struct f2fs_sb_info *sbi) {} +static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; @@ -3686,13 +3761,13 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct page *page); -bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); +int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page); + const struct f2fs_filename *fname, + struct page **res_page); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); -int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, +int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, @@ -3731,7 +3806,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); void f2fs_drop_extent_tree(struct inode *inode); unsigned int f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); @@ -3791,8 +3866,11 @@ int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); bool f2fs_compress_write_end(struct inode *inode, void *fsdata, pgoff_t index, unsigned copied); +int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); +int f2fs_init_compress_mempool(void); +void f2fs_destroy_compress_mempool(void); void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); @@ -3826,6 +3904,8 @@ static inline struct page *f2fs_compress_control_page(struct page *page) WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } +static inline int f2fs_init_compress_mempool(void) { return 0; } +static inline void f2fs_destroy_compress_mempool(void) { } #endif static inline void set_compress_context(struct inode *inode) @@ -3972,6 +4052,10 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode, { int diff = F2FS_I(inode)->i_cluster_size - blocks; + /* don't update i_compr_blocks if saved blocks were released */ + if (!add && !F2FS_I(inode)->i_compr_blocks) + return; + if (add) { F2FS_I(inode)->i_compr_blocks += diff; stat_add_compr_blocks(inode, diff); @@ -4013,8 +4097,6 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, return true; if (f2fs_is_multi_device(sbi)) return true; - if (f2fs_compressed_file(inode)) - return true; /* * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ab8f621a3c5..8a422400e824 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -21,6 +21,7 @@ #include <linux/uuid.h> #include <linux/file.h> #include <linux/nls.h> +#include <linux/sched/signal.h> #include "f2fs.h" #include "node.h" @@ -40,6 +41,10 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) ret = filemap_fault(vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + if (!ret) + f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, + F2FS_BLKSIZE); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret); return ret; @@ -101,11 +106,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (need_alloc) { /* block allocation */ - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_block(&dn, page->index); f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -165,9 +170,11 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) { struct dentry *dentry; - inode = igrab(inode); - dentry = d_find_any_alias(inode); - iput(inode); + /* + * Make sure to get the non-deleted alias. The alias associated with + * the open file descriptor being fsync()'ed may be deleted already. + */ + dentry = d_find_alias(inode); if (!dentry) return 0; @@ -557,6 +564,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) bool compressed_cluster = false; int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + bool released = !F2FS_I(dn->inode)->i_compr_blocks; if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) base = get_extra_isize(dn->inode); @@ -595,7 +603,9 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); f2fs_invalidate_blocks(sbi, blkaddr); - nr_free++; + + if (!released || blkaddr != COMPRESS_ADDR) + nr_free++; } if (compressed_cluster) @@ -643,9 +653,6 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } - if (f2fs_compressed_file(inode)) - return 0; - page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); @@ -661,7 +668,7 @@ truncate_out: return 0; } -static int do_truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -729,23 +736,28 @@ free_partial: int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) { u64 free_from = from; + int err; +#ifdef CONFIG_F2FS_FS_COMPRESSION /* * for compressed file, only support cluster size * aligned truncation. */ - if (f2fs_compressed_file(inode)) { - size_t cluster_shift = PAGE_SHIFT + - F2FS_I(inode)->i_log_cluster_size; - size_t cluster_mask = (1 << cluster_shift) - 1; + if (f2fs_compressed_file(inode)) + free_from = round_up(from, + F2FS_I(inode)->i_cluster_size << PAGE_SHIFT); +#endif - free_from = from >> cluster_shift; - if (from & cluster_mask) - free_from++; - free_from <<= cluster_shift; - } + err = f2fs_do_truncate_blocks(inode, free_from, lock); + if (err) + return err; - return do_truncate_blocks(inode, free_from, lock); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (from != free_from) + err = f2fs_truncate_partial_cluster(inode, from, lock); +#endif + + return err; } int f2fs_truncate(struct inode *inode) @@ -968,9 +980,7 @@ const struct inode_operations f2fs_file_inode_operations = { .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif .fiemap = f2fs_fiemap, }; @@ -1364,8 +1374,6 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) truncate_pagecache(inode, offset); new_size = i_size_read(inode) - len; - truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) @@ -1649,7 +1657,11 @@ next_alloc: down_write(&sbi->pin_sem); map.m_seg_type = CURSEG_COLD_DATA_PINNED; - f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + + f2fs_lock_op(sbi); + f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA); + f2fs_unlock_op(sbi); + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); up_write(&sbi->pin_sem); @@ -2219,8 +2231,15 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (in != F2FS_GOING_DOWN_FULLSYNC) { ret = mnt_want_write_file(filp); - if (ret) + if (ret) { + if (ret == -EROFS) { + ret = 0; + f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + trace_f2fs_shutdown(sbi, in, ret); + } return ret; + } } switch (in) { @@ -2507,6 +2526,11 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + if (ret) { + if (ret == -EBUSY) + ret = -EAGAIN; + goto out; + } range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; @@ -3301,7 +3325,6 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) { struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); __u64 block_count; - int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3313,9 +3336,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) sizeof(block_count))) return -EFAULT; - ret = f2fs_resize_fs(sbi, block_count); - - return ret; + return f2fs_resize_fs(sbi, block_count); } static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) @@ -3342,7 +3363,7 @@ static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) return fsverity_ioctl_measure(filp, (void __user *)arg); } -static int f2fs_get_volume_name(struct file *filp, unsigned long arg) +static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -3368,7 +3389,7 @@ static int f2fs_get_volume_name(struct file *filp, unsigned long arg) return err; } -static int f2fs_set_volume_name(struct file *filp, unsigned long arg) +static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -3419,6 +3440,513 @@ static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg) return put_user(blocks, (u64 __user *)arg); } +static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + unsigned int released_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + block_t blkaddr; + int i; + + for (i = 0; i < count; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + continue; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE))) + return -EFSCORRUPTED; + } + + while (count) { + int compr_blocks = 0; + + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + blkaddr = f2fs_data_blkaddr(dn); + + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + continue; + dn->ofs_in_node += cluster_size; + goto next; + } + + if (__is_valid_data_blkaddr(blkaddr)) + compr_blocks++; + + if (blkaddr != NEW_ADDR) + continue; + + dn->data_blkaddr = NULL_ADDR; + f2fs_set_data_blkaddr(dn); + } + + f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false); + dec_valid_block_count(sbi, dn->inode, + cluster_size - compr_blocks); + + released_blocks += cluster_size - compr_blocks; +next: + count -= cluster_size; + } + + return released_blocks; +} + +static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int released_blocks = 0; + int ret; + int writecount; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + inode_lock(inode); + + writecount = atomic_read(&inode->i_writecount); + if ((filp->f_mode & FMODE_WRITE && writecount != 1) || writecount) { + ret = -EBUSY; + goto out; + } + + if (IS_IMMUTABLE(inode)) { + ret = -EINVAL; + goto out; + } + + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + if (ret) + goto out; + + F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL; + f2fs_set_inode_flags(inode); + inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, true); + + if (!F2FS_I(inode)->i_compr_blocks) + goto out; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + while (page_idx < last_idx) { + struct dnode_of_data dn; + pgoff_t end_offset, count; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) { + page_idx = f2fs_get_next_page_offset(&dn, + page_idx); + ret = 0; + continue; + } + break; + } + + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); + count = round_up(count, F2FS_I(inode)->i_cluster_size); + + ret = release_compress_blocks(&dn, count); + + f2fs_put_dnode(&dn); + + if (ret < 0) + break; + + page_idx += count; + released_blocks += ret; + } + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + up_write(&F2FS_I(inode)->i_mmap_sem); +out: + inode_unlock(inode); + + mnt_drop_write_file(filp); + + if (ret >= 0) { + ret = put_user(released_blocks, (u64 __user *)arg); + } else if (released_blocks && F2FS_I(inode)->i_compr_blocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + "iblocks=%llu, released=%u, compr_blocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, inode->i_blocks, + released_blocks, + F2FS_I(inode)->i_compr_blocks); + } + + return ret; +} + +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + unsigned int reserved_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + block_t blkaddr; + int i; + + for (i = 0; i < count; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + continue; + if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE))) + return -EFSCORRUPTED; + } + + while (count) { + int compr_blocks = 0; + blkcnt_t reserved; + int ret; + + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + blkaddr = f2fs_data_blkaddr(dn); + + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + continue; + dn->ofs_in_node += cluster_size; + goto next; + } + + if (__is_valid_data_blkaddr(blkaddr)) { + compr_blocks++; + continue; + } + + dn->data_blkaddr = NEW_ADDR; + f2fs_set_data_blkaddr(dn); + } + + reserved = cluster_size - compr_blocks; + ret = inc_valid_block_count(sbi, dn->inode, &reserved); + if (ret) + return ret; + + if (reserved != cluster_size - compr_blocks) + return -ENOSPC; + + f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); + + reserved_blocks += reserved; +next: + count -= cluster_size; + } + + return reserved_blocks; +} + +static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int reserved_blocks = 0; + int ret; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (F2FS_I(inode)->i_compr_blocks) + goto out; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + inode_lock(inode); + + if (!IS_IMMUTABLE(inode)) { + ret = -EINVAL; + goto unlock_inode; + } + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + while (page_idx < last_idx) { + struct dnode_of_data dn; + pgoff_t end_offset, count; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) { + page_idx = f2fs_get_next_page_offset(&dn, + page_idx); + ret = 0; + continue; + } + break; + } + + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); + count = round_up(count, F2FS_I(inode)->i_cluster_size); + + ret = reserve_compress_blocks(&dn, count); + + f2fs_put_dnode(&dn); + + if (ret < 0) + break; + + page_idx += count; + reserved_blocks += ret; + } + + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + up_write(&F2FS_I(inode)->i_mmap_sem); + + if (ret >= 0) { + F2FS_I(inode)->i_flags &= ~F2FS_IMMUTABLE_FL; + f2fs_set_inode_flags(inode); + inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, true); + } +unlock_inode: + inode_unlock(inode); +out: + mnt_drop_write_file(filp); + + if (ret >= 0) { + ret = put_user(reserved_blocks, (u64 __user *)arg); + } else if (reserved_blocks && F2FS_I(inode)->i_compr_blocks) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + "iblocks=%llu, reserved=%u, compr_blocks=%llu, " + "run fsck to fix.", + __func__, inode->i_ino, inode->i_blocks, + reserved_blocks, + F2FS_I(inode)->i_compr_blocks); + } + + return ret; +} + +static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, + pgoff_t off, block_t block, block_t len, u32 flags) +{ + struct request_queue *q = bdev_get_queue(bdev); + sector_t sector = SECTOR_FROM_BLOCK(block); + sector_t nr_sects = SECTOR_FROM_BLOCK(len); + int ret = 0; + + if (!q) + return -ENXIO; + + if (flags & F2FS_TRIM_FILE_DISCARD) + ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS, + blk_queue_secure_erase(q) ? + BLKDEV_DISCARD_SECURE : 0); + + if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { + if (IS_ENCRYPTED(inode)) + ret = fscrypt_zeroout_range(inode, off, block, len); + else + ret = blkdev_issue_zeroout(bdev, sector, nr_sects, + GFP_NOFS, 0); + } + + return ret; +} + +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + struct block_device *prev_bdev = NULL; + struct f2fs_sectrim_range range; + pgoff_t index, pg_end, prev_index = 0; + block_t prev_block = 0, len = 0; + loff_t end_addr; + bool to_end = false; + int ret = 0; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, + sizeof(range))) + return -EFAULT; + + if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || + !S_ISREG(inode->i_mode)) + return -EINVAL; + + if (((range.flags & F2FS_TRIM_FILE_DISCARD) && + !f2fs_hw_support_discard(sbi)) || + ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && + IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) + return -EOPNOTSUPP; + + file_start_write(filp); + inode_lock(inode); + + if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || + range.start >= inode->i_size) { + ret = -EINVAL; + goto err; + } + + if (range.len == 0) + goto err; + + if (inode->i_size - range.start > range.len) { + end_addr = range.start + range.len; + } else { + end_addr = range.len == (u64)-1 ? + sbi->sb->s_maxbytes : inode->i_size; + to_end = true; + } + + if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || + (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { + ret = -EINVAL; + goto err; + } + + index = F2FS_BYTES_TO_BLK(range.start); + pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); + + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto err; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + ret = filemap_write_and_wait_range(mapping, range.start, + to_end ? LLONG_MAX : end_addr - 1); + if (ret) + goto out; + + truncate_inode_pages_range(mapping, range.start, + to_end ? -1 : end_addr - 1); + + while (index < pg_end) { + struct dnode_of_data dn; + pgoff_t end_offset, count; + int i; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) { + index = f2fs_get_next_page_offset(&dn, index); + continue; + } + goto out; + } + + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + count = min(end_offset - dn.ofs_in_node, pg_end - index); + for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { + struct block_device *cur_bdev; + block_t blkaddr = f2fs_data_blkaddr(&dn); + + if (!__is_valid_data_blkaddr(blkaddr)) + continue; + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) { + ret = -EFSCORRUPTED; + f2fs_put_dnode(&dn); + goto out; + } + + cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); + if (f2fs_is_multi_device(sbi)) { + int di = f2fs_target_device_index(sbi, blkaddr); + + blkaddr -= FDEV(di).start_blk; + } + + if (len) { + if (prev_bdev == cur_bdev && + index == prev_index + len && + blkaddr == prev_block + len) { + len++; + } else { + ret = f2fs_secure_erase(prev_bdev, + inode, prev_index, prev_block, + len, range.flags); + if (ret) { + f2fs_put_dnode(&dn); + goto out; + } + + len = 0; + } + } + + if (!len) { + prev_bdev = cur_bdev; + prev_index = index; + prev_block = blkaddr; + len = 1; + } + } + + f2fs_put_dnode(&dn); + + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto out; + } + cond_resched(); + } + + if (len) + ret = f2fs_secure_erase(prev_bdev, inode, prev_index, + prev_block, len, range.flags); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); +err: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3427,11 +3955,11 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOSPC; switch (cmd) { - case F2FS_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); - case F2FS_IOC_SETFLAGS: + case FS_IOC_SETFLAGS: return f2fs_ioc_setflags(filp, arg); - case F2FS_IOC_GETVERSION: + case FS_IOC_GETVERSION: return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: return f2fs_ioc_start_atomic_write(filp); @@ -3447,11 +3975,11 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); - case F2FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: return f2fs_ioc_set_encryption_policy(filp, arg); - case F2FS_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_POLICY: return f2fs_ioc_get_encryption_policy(filp, arg); - case F2FS_IOC_GET_ENCRYPTION_PWSALT: + case FS_IOC_GET_ENCRYPTION_PWSALT: return f2fs_ioc_get_encryption_pwsalt(filp, arg); case FS_IOC_GET_ENCRYPTION_POLICY_EX: return f2fs_ioc_get_encryption_policy_ex(filp, arg); @@ -3479,9 +4007,9 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_flush_device(filp, arg); case F2FS_IOC_GET_FEATURES: return f2fs_ioc_get_features(filp, arg); - case F2FS_IOC_FSGETXATTR: + case FS_IOC_FSGETXATTR: return f2fs_ioc_fsgetxattr(filp, arg); - case F2FS_IOC_FSSETXATTR: + case FS_IOC_FSSETXATTR: return f2fs_ioc_fssetxattr(filp, arg); case F2FS_IOC_GET_PIN_FILE: return f2fs_ioc_get_pin_file(filp, arg); @@ -3495,12 +4023,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_enable_verity(filp, arg); case FS_IOC_MEASURE_VERITY: return f2fs_ioc_measure_verity(filp, arg); - case F2FS_IOC_GET_VOLUME_NAME: - return f2fs_get_volume_name(filp, arg); - case F2FS_IOC_SET_VOLUME_NAME: - return f2fs_set_volume_name(filp, arg); + case FS_IOC_GETFSLABEL: + return f2fs_ioc_getfslabel(filp, arg); + case FS_IOC_SETFSLABEL: + return f2fs_ioc_setfslabel(filp, arg); case F2FS_IOC_GET_COMPRESS_BLOCKS: return f2fs_get_compress_blocks(filp, arg); + case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: + return f2fs_release_compress_blocks(filp, arg); + case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: + return f2fs_reserve_compress_blocks(filp, arg); + case F2FS_IOC_SEC_TRIM_FILE: + return f2fs_sec_trim_file(filp, arg); default: return -ENOTTY; } @@ -3510,11 +4044,17 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + int ret; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - return generic_file_read_iter(iocb, iter); + ret = generic_file_read_iter(iocb, iter); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret); + + return ret; } static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) @@ -3619,14 +4159,14 @@ out: long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { - case F2FS_IOC32_GETFLAGS: - cmd = F2FS_IOC_GETFLAGS; + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; break; - case F2FS_IOC32_SETFLAGS: - cmd = F2FS_IOC_SETFLAGS; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; break; - case F2FS_IOC32_GETVERSION: - cmd = F2FS_IOC_GETVERSION; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; break; case F2FS_IOC_START_ATOMIC_WRITE: case F2FS_IOC_COMMIT_ATOMIC_WRITE: @@ -3635,9 +4175,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_ABORT_VOLATILE_WRITE: case F2FS_IOC_SHUTDOWN: case FITRIM: - case F2FS_IOC_SET_ENCRYPTION_POLICY: - case F2FS_IOC_GET_ENCRYPTION_PWSALT: - case F2FS_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_PWSALT: + case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: case FS_IOC_ADD_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY: @@ -3651,17 +4191,20 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_MOVE_RANGE: case F2FS_IOC_FLUSH_DEVICE: case F2FS_IOC_GET_FEATURES: - case F2FS_IOC_FSGETXATTR: - case F2FS_IOC_FSSETXATTR: + case FS_IOC_FSGETXATTR: + case FS_IOC_FSSETXATTR: case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: case F2FS_IOC_RESIZE_FS: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: - case F2FS_IOC_GET_VOLUME_NAME: - case F2FS_IOC_SET_VOLUME_NAME: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: case F2FS_IOC_GET_COMPRESS_BLOCKS: + case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: + case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: + case F2FS_IOC_SEC_TRIM_FILE: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 26248c8936db..11b4adde9baf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -13,6 +13,7 @@ #include <linux/kthread.h> #include <linux/delay.h> #include <linux/freezer.h> +#include <linux/sched/signal.h> #include "f2fs.h" #include "node.h" @@ -20,6 +21,9 @@ #include "gc.h" #include <trace/events/f2fs.h> +static unsigned int count_bits(const unsigned long *addr, + unsigned int offset, unsigned int len); + static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -78,7 +82,7 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (sbi->gc_mode == GC_URGENT) { + if (sbi->gc_mode == GC_URGENT_HIGH) { wait_ms = gc_th->urgent_sleep_time; down_write(&sbi->gc_lock); goto do_gc; @@ -172,7 +176,7 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) gc_mode = GC_CB; break; case GC_IDLE_GREEDY: - case GC_URGENT: + case GC_URGENT_HIGH: gc_mode = GC_GREEDY; break; } @@ -186,14 +190,20 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (p->alloc_mode == SSR) { p->gc_mode = GC_GREEDY; - p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->dirty_bitmap = dirty_i->dirty_segmap[type]; p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { p->gc_mode = select_gc_type(sbi, gc_type); - p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; - p->max_search = dirty_i->nr_dirty[DIRTY]; p->ofs_unit = sbi->segs_per_sec; + if (__is_large_section(sbi)) { + p->dirty_bitmap = dirty_i->dirty_secmap; + p->max_search = count_bits(p->dirty_bitmap, + 0, MAIN_SECS(sbi)); + } else { + p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY]; + p->max_search = dirty_i->nr_dirty[DIRTY]; + } } /* @@ -201,7 +211,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, * foreground GC and urgent GC cases. */ if (gc_type != FG_GC && - (sbi->gc_mode != GC_URGENT) && + (sbi->gc_mode != GC_URGENT_HIGH) && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; @@ -320,6 +330,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched = 0; + int ret = 0; mutex_lock(&dirty_i->seglist_lock); last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; @@ -331,12 +342,19 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (get_valid_blocks(sbi, *result, false) && - !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + if (!get_valid_blocks(sbi, *result, false)) { + ret = -ENODATA; + goto out; + } + + if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + ret = -EBUSY; + else p.min_segno = *result; goto out; } + ret = -ENODATA; if (p.max_search == 0) goto out; @@ -364,10 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } while (1) { - unsigned long cost; - unsigned int segno; - - segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); + unsigned long cost, *dirty_bitmap; + unsigned int unit_no, segno; + + dirty_bitmap = p.dirty_bitmap; + unit_no = find_next_bit(dirty_bitmap, + last_segment / p.ofs_unit, + p.offset / p.ofs_unit); + segno = unit_no * p.ofs_unit; if (segno >= last_segment) { if (sm->last_victim[p.gc_mode]) { last_segment = @@ -380,14 +402,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } p.offset = segno + p.ofs_unit; - if (p.ofs_unit > 1) { - p.offset -= segno % p.ofs_unit; - nsearched += count_bits(p.dirty_segmap, - p.offset - p.ofs_unit, - p.ofs_unit); - } else { - nsearched++; - } + nsearched++; #ifdef CONFIG_F2FS_CHECK_FS /* @@ -420,9 +435,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, next: if (nsearched >= p.max_search) { if (!sm->last_victim[p.gc_mode] && segno <= last_victim) - sm->last_victim[p.gc_mode] = last_victim + 1; + sm->last_victim[p.gc_mode] = + last_victim + p.ofs_unit; else - sm->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] = segno + p.ofs_unit; sm->last_victim[p.gc_mode] %= (MAIN_SECS(sbi) * sbi->segs_per_sec); break; @@ -439,6 +455,7 @@ got_result: else set_bit(secno, dirty_i->victim_secmap); } + ret = 0; } out: @@ -448,7 +465,7 @@ out: prefree_segments(sbi), free_segments(sbi)); mutex_unlock(&dirty_i->seglist_lock); - return (p.min_segno == NULL_SEGNO) ? 0 : 1; + return ret; } static const struct victim_selection default_v_ops = { @@ -737,6 +754,10 @@ got_it: goto put_encrypted_page; f2fs_put_page(fio.encrypted_page, 0); f2fs_put_page(page, 1); + + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); + return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, 1); @@ -828,8 +849,10 @@ static int move_data_block(struct inode *inode, block_t bidx, mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), fio.old_blkaddr, false); - if (!mpage) + if (!mpage) { + err = -ENOMEM; goto up_out; + } fio.encrypted_page = mpage; @@ -840,6 +863,10 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_put_page(mpage, 1); goto up_out; } + + f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); + lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || !PageUptodate(mpage))) { @@ -850,7 +877,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, CURSEG_COLD_DATA, NULL, false); + &sum, CURSEG_COLD_DATA, NULL); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -1324,10 +1351,9 @@ gc_more: ret = -EINVAL; goto stop; } - if (!__get_victim(sbi, &segno, gc_type)) { - ret = -ENODATA; + ret = __get_victim(sbi, &segno, gc_type); + if (ret) goto stop; - } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec) @@ -1399,16 +1425,33 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } -static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, - unsigned int end) +static int free_segment_range(struct f2fs_sb_info *sbi, + unsigned int secs, bool gc_only) { - int type; - unsigned int segno, next_inuse; + unsigned int segno, next_inuse, start, end; + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; + int gc_mode, gc_type; int err = 0; + int type; + + /* Force block allocation for GC */ + MAIN_SECS(sbi) -= secs; + start = MAIN_SECS(sbi) * sbi->segs_per_sec; + end = MAIN_SEGS(sbi) - 1; + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= start) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= start) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); /* Move out cursegs from the target range */ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) - allocate_segment_for_resize(sbi, type, start, end); + f2fs_allocate_segment_for_resize(sbi, type, start, end); /* do GC to move out valid blocks in the range */ for (segno = start; segno <= end; segno += sbi->segs_per_sec) { @@ -1417,18 +1460,24 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - down_write(&sbi->gc_lock); do_garbage_collect(sbi, segno, &gc_list, FG_GC); - up_write(&sbi->gc_lock); put_gc_inode(&gc_list); - if (get_valid_blocks(sbi, segno, true)) - return -EAGAIN; + if (!gc_only && get_valid_blocks(sbi, segno, true)) { + err = -EAGAIN; + goto out; + } + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } } + if (gc_only) + goto out; - err = f2fs_sync_fs(sbi->sb, 1); + err = f2fs_write_checkpoint(sbi, &cpc); if (err) - return err; + goto out; next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); if (next_inuse <= end) { @@ -1436,6 +1485,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, next_inuse); f2fs_bug_on(sbi, 1); } +out: + MAIN_SECS(sbi) += secs; return err; } @@ -1481,6 +1532,7 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + MAIN_SECS(sbi) += secs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); @@ -1502,8 +1554,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) { __u64 old_block_count, shrunk_blocks; + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; unsigned int secs; - int gc_mode, gc_type; int err = 0; __u32 rem; @@ -1538,10 +1590,27 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return -EINVAL; } - freeze_bdev(sbi->sb->s_bdev); - shrunk_blocks = old_block_count - block_count; secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + + /* stop other GC */ + if (!down_write_trylock(&sbi->gc_lock)) + return -EAGAIN; + + /* stop CP to protect MAIN_SEC in free_segment_range */ + f2fs_lock_op(sbi); + err = free_segment_range(sbi, secs, true); + f2fs_unlock_op(sbi); + up_write(&sbi->gc_lock); + if (err) + return err; + + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + freeze_super(sbi->sb); + down_write(&sbi->gc_lock); + mutex_lock(&sbi->cp_mutex); + spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + sbi->current_reserved_blocks + sbi->unusable_block_count + @@ -1550,69 +1619,44 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) else sbi->user_block_count -= shrunk_blocks; spin_unlock(&sbi->stat_lock); - if (err) { - thaw_bdev(sbi->sb->s_bdev, sbi->sb); - return err; - } - - mutex_lock(&sbi->resize_mutex); - set_sbi_flag(sbi, SBI_IS_RESIZEFS); - - mutex_lock(&DIRTY_I(sbi)->seglist_lock); - - MAIN_SECS(sbi) -= secs; - - for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) - if (SIT_I(sbi)->last_victim[gc_mode] >= - MAIN_SECS(sbi) * sbi->segs_per_sec) - SIT_I(sbi)->last_victim[gc_mode] = 0; - - for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) - if (sbi->next_victim_seg[gc_type] >= - MAIN_SECS(sbi) * sbi->segs_per_sec) - sbi->next_victim_seg[gc_type] = NULL_SEGNO; - - mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + if (err) + goto out_err; - err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, - MAIN_SEGS(sbi) - 1); + err = free_segment_range(sbi, secs, false); if (err) - goto out; + goto recover_out; update_sb_metadata(sbi, -secs); err = f2fs_commit_super(sbi, false); if (err) { update_sb_metadata(sbi, secs); - goto out; + goto recover_out; } - mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, -secs); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); set_sbi_flag(sbi, SBI_IS_DIRTY); - mutex_unlock(&sbi->cp_mutex); - err = f2fs_sync_fs(sbi->sb, 1); + err = f2fs_write_checkpoint(sbi, &cpc); if (err) { - mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, secs); - mutex_unlock(&sbi->cp_mutex); update_sb_metadata(sbi, secs); f2fs_commit_super(sbi, false); } -out: +recover_out: if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); - MAIN_SECS(sbi) += secs; spin_lock(&sbi->stat_lock); sbi->user_block_count += shrunk_blocks; spin_unlock(&sbi->stat_lock); } +out_err: + mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->gc_lock); + thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); - mutex_unlock(&sbi->resize_mutex); - thaw_bdev(sbi->sb->s_bdev, sbi->sb); return err; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index bbac9d3787bd..db3c61046aa4 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/gc.h * diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5bc4dcd8fc03..de841aaf3c43 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -12,7 +12,6 @@ #include <linux/types.h> #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/cryptohash.h> #include <linux/pagemap.h> #include <linux/unicode.h> @@ -68,22 +67,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname) +static u32 TEA_hash_name(const u8 *p, size_t len) { - __u32 hash; - f2fs_hash_t f2fs_hash; - const unsigned char *p; __u32 in[8], buf[4]; - const unsigned char *name = name_info->name; - size_t len = name_info->len; - - /* encrypted bigname case */ - if (fname && !fname->disk_name.name) - return cpu_to_le32(fname->hash); - - if (is_dot_dotdot(name_info)) - return 0; /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; @@ -91,7 +77,6 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, buf[2] = 0x98badcfe; buf[3] = 0x10325476; - p = name; while (1) { str2hashbuf(p, len, in, 4); TEA_transform(buf, in); @@ -100,41 +85,43 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, break; len -= 16; } - hash = buf[0]; - f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); - return f2fs_hash; + return buf[0] & ~F2FS_HASH_COL_BIT; } -f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname) +/* + * Compute @fname->hash. For all directories, @fname->disk_name must be set. + * For casefolded directories, @fname->usr_fname must be set, and also + * @fname->cf_name if the filename is valid Unicode. + */ +void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) { -#ifdef CONFIG_UNICODE - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; - int r, dlen; - unsigned char *buff; - struct qstr folded; + const u8 *name = fname->disk_name.name; + size_t len = fname->disk_name.len; - if (!name_info->len || !IS_CASEFOLDED(dir)) - goto opaque_seq; + WARN_ON_ONCE(!name); - buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); - if (!buff) - return -ENOMEM; - - dlen = utf8_casefold(um, name_info, buff, PATH_MAX); - if (dlen < 0) { - kvfree(buff); - goto opaque_seq; + if (is_dot_dotdot(name, len)) { + fname->hash = 0; + return; } - folded.name = buff; - folded.len = dlen; - r = __f2fs_dentry_hash(&folded, fname); - - kvfree(buff); - return r; -opaque_seq: +#ifdef CONFIG_UNICODE + if (IS_CASEFOLDED(dir)) { + /* + * If the casefolded name is provided, hash it instead of the + * on-disk name. If the casefolded name is *not* provided, that + * should only be because the name wasn't valid Unicode, so fall + * back to treating the name as an opaque byte sequence. + */ + WARN_ON_ONCE(!fname->usr_fname->name); + if (fname->cf_name.name) { + name = fname->cf_name.name; + len = fname->cf_name.len; + } else { + name = fname->usr_fname->name; + len = fname->usr_fname->len; + } + } #endif - return __f2fs_dentry_hash(name_info, fname); + fname->hash = cpu_to_le32(TEA_hash_name(name, len)); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4167e5408151..102df444f623 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -8,9 +8,11 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> +#include <linux/fiemap.h> #include "f2fs.h" #include "node.h" +#include <trace/events/f2fs.h> bool f2fs_may_inline_data(struct inode *inode) { @@ -252,7 +254,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return 0; } -bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) +int f2fs_recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; @@ -274,7 +276,8 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: ipage = f2fs_get_node_page(sbi, inode->i_ino); - f2fs_bug_on(sbi, IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true, true); @@ -287,33 +290,36 @@ process_inline: set_page_dirty(ipage); f2fs_put_page(ipage, 1); - return true; + return 1; } if (f2fs_has_inline_data(inode)) { ipage = f2fs_get_node_page(sbi, inode->i_ino); - f2fs_bug_on(sbi, IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); f2fs_truncate_inline_inode(inode, ipage, 0); clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false)) - return false; + int ret; + + ret = f2fs_truncate_blocks(inode, 0, false); + if (ret) + return ret; goto process_inline; } - return false; + return 0; } struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, - struct fscrypt_name *fname, struct page **res_page) + const struct f2fs_filename *fname, + struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - struct qstr name = FSTR_TO_QSTR(&fname->disk_name); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; struct page *ipage; void *inline_dentry; - f2fs_hash_t namehash; ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) { @@ -321,12 +327,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, return NULL; } - namehash = f2fs_dentry_hash(dir, &name, fname); - inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(fname, namehash, NULL, &d); + de = f2fs_find_target_dentry(&d, fname, NULL); unlock_page(ipage); if (de) *res_page = ipage; @@ -443,7 +447,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) while (bit_pos < d.max) { struct f2fs_dir_entry *de; - struct qstr new_name; + struct f2fs_filename fname; nid_t ino; umode_t fake_mode; @@ -459,14 +463,19 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) continue; } - new_name.name = d.filename[bit_pos]; - new_name.len = le16_to_cpu(de->name_len); + /* + * We only need the disk_name and hash to move the dentry. + * We don't need the original or casefolded filenames. + */ + memset(&fname, 0, sizeof(fname)); + fname.disk_name.name = d.filename[bit_pos]; + fname.disk_name.len = le16_to_cpu(de->name_len); + fname.hash = de->hash_code; ino = le32_to_cpu(de->ino); fake_mode = f2fs_get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, - ino, fake_mode); + err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode); if (err) goto punch_dentry_pages; @@ -543,7 +552,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; - struct fscrypt_name fname; + struct f2fs_filename fname; void *inline_dentry = NULL; int err = 0; @@ -552,19 +561,19 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) f2fs_lock_op(sbi); - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + err = f2fs_setup_filename(dir, &dentry->d_name, 0, &fname); if (err) goto out; ipage = f2fs_get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) { err = PTR_ERR(ipage); - goto out; + goto out_fname; } if (f2fs_has_enough_room(dir, ipage, &fname)) { f2fs_put_page(ipage, 1); - goto out; + goto out_fname; } inline_dentry = inline_data_addr(dir, ipage); @@ -572,22 +581,22 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) err = do_convert_inline_dir(dir, ipage, inline_dentry); if (!err) f2fs_put_page(ipage, 1); +out_fname: + f2fs_free_filename(&fname); out: f2fs_unlock_op(sbi); return err; } -int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, + struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos; - f2fs_hash_t name_hash; void *inline_dentry = NULL; struct f2fs_dentry_ptr d; - int slots = GET_DENTRY_SLOTS(new_name->len); + int slots = GET_DENTRY_SLOTS(fname->disk_name.len); struct page *page = NULL; int err = 0; @@ -609,8 +618,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, new_name, - orig_name, ipage); + page = f2fs_init_inode_metadata(inode, dir, fname, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -619,8 +627,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(dir, new_name, NULL); - f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, + bit_pos); set_page_dirty(ipage); @@ -774,6 +782,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, byteaddr += (char *)inline_data_addr(inode, ipage) - (char *)F2FS_INODE(ipage); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); + trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: f2fs_put_page(ipage, 1); return err; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 44582a4db513..66969ae852b9 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - if (f2fs_init_extent_tree(inode, &ri->i_ext)) - set_page_dirty(node_page); + f2fs_init_extent_tree(inode, node_page); get_inline_info(inode, ri); @@ -402,6 +401,7 @@ static int do_read_inode(struct inode *inode) /* try to recover cold bit for non-dir inode */ if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_cold_node(node_page, false); set_page_dirty(node_page); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f54119da2217..84e4bbc1a64d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -482,7 +482,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); - struct fscrypt_name fname; + struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -491,19 +491,20 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - err = fscrypt_prepare_lookup(dir, dentry, &fname); + err = f2fs_prepare_lookup(dir, dentry, &fname); if (err == -ENOENT) goto out_splice; if (err) goto out; de = __f2fs_find_entry(dir, &fname, &page); - fscrypt_free_filename(&fname); + f2fs_free_filename(&fname); if (!de) { if (IS_ERR(page)) { err = PTR_ERR(page); goto out; } + err = -ENOENT; goto out_splice; } @@ -549,7 +550,7 @@ out_splice: #endif new = d_splice_alias(inode, dentry); err = PTR_ERR_OR_ZERO(new); - trace_f2fs_lookup_end(dir, dentry, ino, err); + trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err); return new; out_iput: iput(inode); @@ -564,19 +565,21 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); struct f2fs_dir_entry *de; struct page *page; - int err = -ENOENT; + int err; trace_f2fs_unlink_enter(dir, dentry); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto fail; + } err = dquot_initialize(dir); if (err) - return err; + goto fail; err = dquot_initialize(inode); if (err) - return err; + goto fail; de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { @@ -599,7 +602,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the - * negative dentries at f2fs_lookup(), when it is better + * negative dentries at f2fs_lookup(), when it is better * supported by the VFS for the CI case. */ if (IS_CASEFOLDED(dir)) @@ -1284,12 +1287,10 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, } const struct inode_operations f2fs_encrypted_symlink_inode_operations = { - .get_link = f2fs_encrypted_get_link, + .get_link = f2fs_encrypted_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif }; const struct inode_operations f2fs_dir_inode_operations = { @@ -1307,27 +1308,21 @@ const struct inode_operations f2fs_dir_inode_operations = { .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif .fiemap = f2fs_fiemap, }; const struct inode_operations f2fs_symlink_inode_operations = { - .get_link = f2fs_get_link, + .get_link = f2fs_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif }; const struct inode_operations f2fs_special_inode_operations = { .getattr = f2fs_getattr, - .setattr = f2fs_setattr, + .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, -#ifdef CONFIG_F2FS_FS_XATTR .listxattr = f2fs_listxattr, -#endif }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ecbd6bd14a49..cb1b5b61a1da 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -618,10 +618,10 @@ pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) switch (dn->max_level) { case 3: base += 2 * indirect_blks; - /* fall through */ + fallthrough; case 2: base += 2 * direct_blks; - /* fall through */ + fallthrough; case 1: base += direct_index; break; @@ -1041,8 +1041,10 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); - if (level < 0) + if (level < 0) { + trace_f2fs_truncate_inode_blocks_exit(inode, level); return level; + } page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -1300,7 +1302,13 @@ static int read_node_page(struct page *page, int op_flags) } fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; - return f2fs_submit_page_bio(&fio); + + err = f2fs_submit_page_bio(&fio); + + if (!err) + f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE); + + return err; } /* @@ -1514,8 +1522,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, trace_f2fs_writepage(page, NODE); - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } goto redirty_out; + } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; @@ -1713,7 +1728,7 @@ continue_unlock: set_dentry_mark(page, f2fs_need_dentry_mark(sbi, ino)); } - /* may be written by other thread */ + /* may be written by other thread */ if (!PageDirty(page)) set_page_dirty(page); } @@ -1801,6 +1816,51 @@ static bool flush_dirty_inode(struct page *page) return true; } +void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) +{ + pgoff_t index = 0; + struct pagevec pvec; + int nr_pages; + + pagevec_init(&pvec); + + while ((nr_pages = pagevec_lookup_tag(&pvec, + NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (!IS_DNODE(page)) + continue; + + lock_page(page); + + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { +continue_unlock: + unlock_page(page); + continue; + } + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + /* flush inline_data, if it's async context. */ + if (is_inline_node(page)) { + clear_inline_node(page); + unlock_page(page); + flush_inline_data(sbi, ino_of_node(page)); + continue; + } + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } +} + int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type) @@ -1864,6 +1924,10 @@ continue_unlock: goto continue_unlock; } + /* flush inline_data/inode, if it's async context. */ + if (!do_balance) + goto write_node; + /* flush inline_data */ if (is_inline_node(page)) { clear_inline_node(page); @@ -1878,7 +1942,7 @@ continue_unlock: if (flush_dirty_inode(page)) goto lock_node; } - +write_node: f2fs_wait_on_page_writeback(page, NODE, true, true); if (!clear_page_dirty_for_io(page)) @@ -2037,7 +2101,7 @@ const struct address_space_operations f2fs_node_aops = { .invalidatepage = f2fs_invalidate_page, .releasepage = f2fs_release_page, #ifdef CONFIG_MIGRATION - .migratepage = f2fs_migrate_page, + .migratepage = f2fs_migrate_page, #endif }; @@ -2048,7 +2112,7 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, } static int __insert_free_nid(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_state state) + struct free_nid *i) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -2056,10 +2120,8 @@ static int __insert_free_nid(struct f2fs_sb_info *sbi, if (err) return err; - f2fs_bug_on(sbi, state != i->state); - nm_i->nid_cnt[state]++; - if (state == FREE_NID) - list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->nid_cnt[FREE_NID]++; + list_add_tail(&i->list, &nm_i->free_nid_list); return 0; } @@ -2181,7 +2243,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, } } ret = true; - err = __insert_free_nid(sbi, i, FREE_NID); + err = __insert_free_nid(sbi, i); err_out: if (update) { update_free_nid_bitmap(sbi, nid, ret, build); @@ -2311,6 +2373,9 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (unlikely(nid >= nm_i->max_nid)) nid = 0; + if (unlikely(nid % NAT_ENTRY_PER_BLOCK)) + nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK; + /* Enough entries */ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return 0; @@ -2482,7 +2547,6 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i, *next; int nr = nr_shrink; if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) @@ -2491,23 +2555,29 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) if (!mutex_trylock(&nm_i->build_lock)) return 0; - spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || - nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) - break; + while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) { + struct free_nid *i, *next; + unsigned int batch = SHRINK_NID_BATCH_SIZE; - __remove_free_nid(sbi, i, FREE_NID); - kmem_cache_free(free_nid_slab, i); - nr_shrink--; + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { + if (!nr_shrink || !batch || + nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) + break; + __remove_free_nid(sbi, i, FREE_NID); + kmem_cache_free(free_nid_slab, i); + nr_shrink--; + batch--; + } + spin_unlock(&nm_i->nid_list_lock); } - spin_unlock(&nm_i->nid_list_lock); + mutex_unlock(&nm_i->build_lock); return nr - nr_shrink; } -void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) +int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; size_t inline_size; @@ -2515,7 +2585,8 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) struct f2fs_inode *ri; ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); ri = F2FS_INODE(page); if (ri->i_inline & F2FS_INLINE_XATTR) { @@ -2534,6 +2605,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) update_inode: f2fs_update_inode(inode, ipage); f2fs_put_page(ipage, 1); + return 0; } int f2fs_recover_xattr_data(struct inode *inode, struct page *page) @@ -2928,7 +3000,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); - nm_i->nat_bits = f2fs_kzalloc(sbi, + nm_i->nat_bits = f2fs_kvzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3061,9 +3133,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) int i; nm_i->free_nid_bitmap = - f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *), - nm_i->nat_blocks), - GFP_KERNEL); + f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *), + nm_i->nat_blocks), + GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e05af5df5648..69e5859e993c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/node.h * @@ -15,6 +15,9 @@ #define FREE_NID_PAGES 8 #define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) +/* size of free nid batch when shrinking */ +#define SHRINK_NID_BATCH_SIZE 8 + #define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dd804c07eeb0..4f12ade6410a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -107,13 +107,51 @@ static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) kmem_cache_free(fsync_entry_slab, entry); } +static int init_recovered_filename(const struct inode *dir, + struct f2fs_inode *raw_inode, + struct f2fs_filename *fname, + struct qstr *usr_fname) +{ + int err; + + memset(fname, 0, sizeof(*fname)); + fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen); + fname->disk_name.name = raw_inode->i_name; + + if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN)) + return -ENAMETOOLONG; + + if (!IS_ENCRYPTED(dir)) { + usr_fname->name = fname->disk_name.name; + usr_fname->len = fname->disk_name.len; + fname->usr_fname = usr_fname; + } + + /* Compute the hash of the filename */ + if (IS_CASEFOLDED(dir)) { + err = f2fs_init_casefolded_name(dir, fname); + if (err) + return err; + f2fs_hash_filename(dir, fname); +#ifdef CONFIG_UNICODE + /* Case-sensitive match is fine for recovery */ + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif + } else { + f2fs_hash_filename(dir, fname); + } + return 0; +} + static int recover_dentry(struct inode *inode, struct page *ipage, struct list_head *dir_list) { struct f2fs_inode *raw_inode = F2FS_INODE(ipage); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; - struct fscrypt_name fname; + struct f2fs_filename fname; + struct qstr usr_fname; struct page *page; struct inode *dir, *einode; struct fsync_inode_entry *entry; @@ -132,16 +170,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage, } dir = entry->inode; - - memset(&fname, 0, sizeof(struct fscrypt_name)); - fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen); - fname.disk_name.name = raw_inode->i_name; - - if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) { - WARN_ON(1); - err = -ENAMETOOLONG; + err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname); + if (err) goto out; - } retry: de = __f2fs_find_entry(dir, &fname, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) @@ -513,7 +544,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* step 1: recover xattr */ if (IS_INODE(page)) { - f2fs_recover_inline_xattr(inode, page); + err = f2fs_recover_inline_xattr(inode, page); + if (err) + goto out; } else if (f2fs_has_xattr_block(ofs_of_node(page))) { err = f2fs_recover_xattr_data(inode, page); if (!err) @@ -522,8 +555,12 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 2: recover inline data */ - if (f2fs_recover_inline_data(inode, page)) + err = f2fs_recover_inline_data(inode, page); + if (err) { + if (err == 1) + err = 0; goto out; + } /* step 3: recover data indices */ start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); @@ -711,7 +748,7 @@ next: f2fs_put_page(page, 1); } if (!err) - f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE); + f2fs_allocate_new_segments(sbi); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b7a9421472a7..e247a5ef3713 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -174,7 +174,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) if (f2fs_lfs_mode(sbi)) return false; - if (sbi->gc_mode == GC_URGENT) + if (sbi->gc_mode == GC_URGENT_HIGH) return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; @@ -796,6 +796,18 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, } if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]++; + + if (__is_large_section(sbi)) { + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + block_t valid_blocks = + get_valid_blocks(sbi, segno, true); + + f2fs_bug_on(sbi, unlikely(!valid_blocks || + valid_blocks == BLKS_PER_SEC(sbi))); + + if (!IS_CURSEC(sbi, secno)) + set_bit(secno, dirty_i->dirty_secmap); + } } } @@ -803,6 +815,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + block_t valid_blocks; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]--; @@ -814,13 +827,26 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, true) == 0) { + valid_blocks = get_valid_blocks(sbi, segno, true); + if (valid_blocks == 0) { clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); #ifdef CONFIG_F2FS_CHECK_FS clear_bit(segno, SIT_I(sbi)->invalid_segmap); #endif } + if (__is_large_section(sbi)) { + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + + if (!valid_blocks || + valid_blocks == BLKS_PER_SEC(sbi)) { + clear_bit(secno, dirty_i->dirty_secmap); + return; + } + + if (!IS_CURSEC(sbi, secno)) + set_bit(secno, dirty_i->dirty_secmap); + } } } @@ -1029,9 +1055,9 @@ static void f2fs_submit_discard_endio(struct bio *bio) struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; unsigned long flags; - dc->error = blk_status_to_errno(bio->bi_status); - spin_lock_irqsave(&dc->lock, flags); + if (!dc->error) + dc->error = blk_status_to_errno(bio->bi_status); dc->bio_ref--; if (!dc->bio_ref && dc->state == D_SUBMIT) { dc->state = D_DONE; @@ -1101,7 +1127,6 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->max_requests = UINT_MAX; dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = 1; @@ -1215,12 +1240,14 @@ submit: len = total_len; } - if (!err && len) + if (!err && len) { + dcc->undiscard_blks -= len; __update_discard_tree_range(sbi, bdev, lstart, start, len); + } return err; } -static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, +static void __insert_discard_tree(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len, struct rb_node **insert_p, @@ -1229,7 +1256,6 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct rb_node **p; struct rb_node *parent = NULL; - struct discard_cmd *dc = NULL; bool leftmost = true; if (insert_p && insert_parent) { @@ -1241,12 +1267,8 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart, &leftmost); do_insert: - dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, + __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p, leftmost); - if (!dc) - return NULL; - - return dc; } static void __relocate_discard_cmd(struct discard_cmd_control *dcc, @@ -1463,6 +1485,8 @@ next: return issued; } +static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy); static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) @@ -1471,12 +1495,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, issued = 0; + int i, issued; bool io_interrupted = false; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); +retry: + issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) @@ -1523,6 +1549,11 @@ next: break; } + if (dpolicy->type == DPOLICY_UMOUNT && issued) { + __wait_all_discard_cmd(sbi, dpolicy); + goto retry; + } + if (!issued && io_interrupted) issued = -1; @@ -1728,7 +1759,7 @@ static int issue_discard_thread(void *data) continue; } - if (sbi->gc_mode == GC_URGENT) + if (sbi->gc_mode == GC_URGENT_HIGH) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -2135,7 +2166,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) new_vblocks = se->valid_blocks + del; offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || + f2fs_bug_on(sbi, (new_vblocks < 0 || (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; @@ -2600,7 +2631,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) bool reversed = false; /* f2fs_need_SSR() already forces to do this */ - if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { + if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { curseg->next_segno = segno; return 1; } @@ -2627,7 +2658,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) for (; cnt-- > 0; reversed ? i-- : i++) { if (i == type) continue; - if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { + if (!v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { curseg->next_segno = segno; return 1; } @@ -2669,7 +2700,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } -void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2702,28 +2733,35 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type) +static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type) { - struct curseg_info *curseg; + struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; - int i; - down_write(&SIT_I(sbi)->sentry_lock); + if (!curseg->next_blkoff && + !get_valid_blocks(sbi, curseg->segno, false) && + !get_ckpt_valid_blocks(sbi, curseg->segno)) + return; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - if (type != NO_CHECK_TYPE && i != type) - continue; + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); +} - curseg = CURSEG_I(sbi, i); - if (type == NO_CHECK_TYPE || curseg->next_blkoff || - get_valid_blocks(sbi, curseg->segno, false) || - get_ckpt_valid_blocks(sbi, curseg->segno)) { - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_segno); - } - } +void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type) +{ + down_write(&SIT_I(sbi)->sentry_lock); + __allocate_new_segment(sbi, type); + up_write(&SIT_I(sbi)->sentry_lock); +} + +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +{ + int i; + down_write(&SIT_I(sbi)->sentry_lock); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segment(sbi, i); up_write(&SIT_I(sbi)->sentry_lock); } @@ -3084,7 +3122,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio, bool add_list) + struct f2fs_io_info *fio) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -3152,7 +3190,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (F2FS_IO_ALIGNED(sbi)) fio->retry = false; - if (add_list) { + if (fio) { struct f2fs_bio_info *io; INIT_LIST_HEAD(&fio->list); @@ -3201,7 +3239,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio, true); + &fio->new_blkaddr, sum, type, fio); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); @@ -4277,8 +4315,9 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0; - unsigned short valid_blocks; + unsigned int segno = 0, offset = 0, secno; + block_t valid_blocks; + block_t blks_per_sec = BLKS_PER_SEC(sbi); while (1) { /* find dirty segment based on free segmap */ @@ -4297,6 +4336,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) __locate_dirty_segment(sbi, segno, DIRTY); mutex_unlock(&dirty_i->seglist_lock); } + + if (!__is_large_section(sbi)) + return; + + mutex_lock(&dirty_i->seglist_lock); + for (segno = 0; segno < MAIN_SECS(sbi); segno += blks_per_sec) { + valid_blocks = get_valid_blocks(sbi, segno, true); + secno = GET_SEC_FROM_SEG(sbi, segno); + + if (!valid_blocks || valid_blocks == blks_per_sec) + continue; + if (IS_CURSEC(sbi, secno)) + continue; + set_bit(secno, dirty_i->dirty_secmap); + } + mutex_unlock(&dirty_i->seglist_lock); } static int init_victim_secmap(struct f2fs_sb_info *sbi) @@ -4333,6 +4388,14 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) return -ENOMEM; } + if (__is_large_section(sbi)) { + bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); + dirty_i->dirty_secmap = f2fs_kvzalloc(sbi, + bitmap_size, GFP_KERNEL); + if (!dirty_i->dirty_secmap) + return -ENOMEM; + } + init_dirty_segmap(sbi); return init_victim_secmap(sbi); } @@ -4759,6 +4822,12 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) for (i = 0; i < NR_DIRTY_TYPE; i++) discard_dirty_segmap(sbi, i); + if (__is_large_section(sbi)) { + mutex_lock(&dirty_i->seglist_lock); + kvfree(dirty_i->dirty_secmap); + mutex_unlock(&dirty_i->seglist_lock); + } + destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; kvfree(dirty_i); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7a83bd530812..752b177073b2 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/segment.h * @@ -166,8 +166,11 @@ enum { struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ int gc_mode; /* GC_CB or GC_GREEDY */ - unsigned long *dirty_segmap; /* dirty segment bitmap */ - unsigned int max_search; /* maximum # of segments to search */ + unsigned long *dirty_bitmap; /* dirty segment/section bitmap */ + unsigned int max_search; /* + * maximum # of segments/sections + * to search + */ unsigned int offset; /* last scanned bitmap offset */ unsigned int ofs_unit; /* bitmap search unit */ unsigned int min_cost; /* minimum cost */ @@ -184,7 +187,7 @@ struct seg_entry { unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */ #endif /* - * # of valid blocks and the validity bitmap stored in the the last + * # of valid blocks and the validity bitmap stored in the last * checkpoint pack. This information is used by the SSR mode. */ unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */ @@ -266,6 +269,7 @@ enum dirty_type { struct dirty_seglist_info { const struct victim_selection *v_ops; /* victim selction operation */ unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + unsigned long *dirty_secmap; struct mutex seglist_lock; /* lock for segment bitmaps */ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ unsigned long *victim_secmap; /* background GC victims */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f2dfc21c6abb..dfa072fa8081 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -138,6 +138,7 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_inlinecrypt, Opt_checkpoint_disable, Opt_checkpoint_disable_cap, Opt_checkpoint_disable_cap_perc, @@ -202,7 +203,9 @@ static match_table_t f2fs_tokens = { {Opt_whint, "whint_mode=%s"}, {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, + {Opt_test_dummy_encryption, "test_dummy_encryption=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_inlinecrypt, "inlinecrypt"}, {Opt_checkpoint_disable, "checkpoint=disable"}, {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, @@ -284,6 +287,22 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } +static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) +{ + if (!F2FS_OPTION(sbi).unusable_cap_perc) + return; + + if (F2FS_OPTION(sbi).unusable_cap_perc == 100) + F2FS_OPTION(sbi).unusable_cap = sbi->user_block_count; + else + F2FS_OPTION(sbi).unusable_cap = (sbi->user_block_count / 100) * + F2FS_OPTION(sbi).unusable_cap_perc; + + f2fs_info(sbi, "Adjust unusable cap for checkpoint=disable = %u / %u%%", + F2FS_OPTION(sbi).unusable_cap, + F2FS_OPTION(sbi).unusable_cap_perc); +} + static void init_once(void *foo) { struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; @@ -331,7 +350,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, set_opt(sbi, QUOTA); return 0; errout: - kvfree(qname); + kfree(qname); return ret; } @@ -343,7 +362,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } - kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -394,18 +413,64 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } #endif -static int parse_options(struct super_block *sb, char *options) +static int f2fs_set_test_dummy_encryption(struct super_block *sb, + const char *opt, + const substring_t *arg, + bool is_remount) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); +#ifdef CONFIG_FS_ENCRYPTION + int err; + + if (!f2fs_sb_has_encrypt(sbi)) { + f2fs_err(sbi, "Encrypt feature is off"); + return -EINVAL; + } + + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (is_remount && !F2FS_OPTION(sbi).dummy_enc_ctx.ctx) { + f2fs_warn(sbi, "Can't set test_dummy_encryption on remount"); + return -EINVAL; + } + err = fscrypt_set_test_dummy_encryption( + sb, arg, &F2FS_OPTION(sbi).dummy_enc_ctx); + if (err) { + if (err == -EEXIST) + f2fs_warn(sbi, + "Can't change test_dummy_encryption on remount"); + else if (err == -EINVAL) + f2fs_warn(sbi, "Value of option \"%s\" is unrecognized", + opt); + else + f2fs_warn(sbi, "Error processing option \"%s\" [%d]", + opt, err); + return -EINVAL; + } + f2fs_warn(sbi, "Test dummy encryption mode enabled"); +#else + f2fs_warn(sbi, "Test dummy encryption mount option ignored"); +#endif + return 0; +} + +static int parse_options(struct super_block *sb, char *options, bool is_remount) { struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; +#ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; + int ext_cnt; +#endif char *p, *name; - int arg = 0, ext_cnt; + int arg = 0; kuid_t uid; kgid_t gid; -#ifdef CONFIG_QUOTA int ret; -#endif if (!options) return 0; @@ -427,17 +492,17 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; - if (strlen(name) == 2 && !strncmp(name, "on", 2)) { + if (!strcmp(name, "on")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { + } else if (!strcmp(name, "off")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; - } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { + } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -591,22 +656,20 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; - if (strlen(name) == 8 && - !strncmp(name, "adaptive", 8)) { + if (!strcmp(name, "adaptive")) { if (f2fs_sb_has_blkzoned(sbi)) { f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); - kvfree(name); + kfree(name); return -EINVAL; } F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - } else if (strlen(name) == 3 && - !strncmp(name, "lfs", 3)) { + } else if (!strcmp(name, "lfs")) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) @@ -725,69 +788,61 @@ static int parse_options(struct super_block *sb, char *options) name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 10 && - !strncmp(name, "user-based", 10)) { + if (!strcmp(name, "user-based")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; - } else if (strlen(name) == 3 && - !strncmp(name, "off", 3)) { + } else if (!strcmp(name, "off")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; - } else if (strlen(name) == 8 && - !strncmp(name, "fs-based", 8)) { + } else if (!strcmp(name, "fs-based")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_alloc: name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 7 && - !strncmp(name, "default", 7)) { + if (!strcmp(name, "default")) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; - } else if (strlen(name) == 5 && - !strncmp(name, "reuse", 5)) { + } else if (!strcmp(name, "reuse")) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_fsync: name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 5 && - !strncmp(name, "posix", 5)) { + if (!strcmp(name, "posix")) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - } else if (strlen(name) == 6 && - !strncmp(name, "strict", 6)) { + } else if (!strcmp(name, "strict")) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; - } else if (strlen(name) == 9 && - !strncmp(name, "nobarrier", 9)) { + } else if (!strcmp(name, "nobarrier")) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_test_dummy_encryption: -#ifdef CONFIG_FS_ENCRYPTION - if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, "Encrypt feature is off"); - return -EINVAL; - } - - F2FS_OPTION(sbi).test_dummy_encryption = true; - f2fs_info(sbi, "Test dummy encryption mode enabled"); + ret = f2fs_set_test_dummy_encryption(sb, p, &args[0], + is_remount); + if (ret) + return ret; + break; + case Opt_inlinecrypt: +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + sb->s_flags |= SB_INLINECRYPT; #else - f2fs_info(sbi, "Test dummy encryption mount option ignored"); + f2fs_info(sbi, "inline encryption not supported"); #endif break; case Opt_checkpoint_disable_cap_perc: @@ -795,12 +850,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg < 0 || arg > 100) return -EINVAL; - if (arg == 100) - F2FS_OPTION(sbi).unusable_cap = - sbi->user_block_count; - else - F2FS_OPTION(sbi).unusable_cap = - (sbi->user_block_count / 100) * arg; + F2FS_OPTION(sbi).unusable_cap_perc = arg; set_opt(sbi, DISABLE_CHECKPOINT); break; case Opt_checkpoint_disable_cap: @@ -815,6 +865,7 @@ static int parse_options(struct super_block *sb, char *options) case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; +#ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: if (!f2fs_sb_has_compression(sbi)) { f2fs_err(sbi, "Compression feature if off"); @@ -823,17 +874,18 @@ static int parse_options(struct super_block *sb, char *options) name = match_strdup(&args[0]); if (!name) return -ENOMEM; - if (strlen(name) == 3 && !strcmp(name, "lzo")) { + if (!strcmp(name, "lzo")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; - } else if (strlen(name) == 3 && - !strcmp(name, "lz4")) { + } else if (!strcmp(name, "lz4")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; - } else if (strlen(name) == 4 && - !strcmp(name, "zstd")) { + } else if (!strcmp(name, "zstd")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; + } else if (!strcmp(name, "lzo-rle")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZORLE; } else { kfree(name); return -EINVAL; @@ -879,6 +931,13 @@ static int parse_options(struct super_block *sb, char *options) F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; +#else + case Opt_compress_algorithm: + case Opt_compress_log_size: + case Opt_compress_extension: + f2fs_info(sbi, "compression options not supported"); + break; +#endif default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -976,6 +1035,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; + fi->ra_offset = -1; + return &fi->vfs_inode; } @@ -1134,6 +1195,9 @@ static void f2fs_put_super(struct super_block *sb) int i; bool dropped; + /* unregister procfs/sysfs entries in advance to avoid race case */ + f2fs_unregister_sysfs(sbi); + f2fs_quota_off_umount(sb); /* prevent remaining shrinker jobs */ @@ -1199,27 +1263,26 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->ckpt); - f2fs_unregister_sysfs(sbi); - sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kvfree(sbi->raw_super); + kfree(sbi->raw_super); destroy_device_list(sbi); f2fs_destroy_xattr_caches(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif + fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx); destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE utf8_unload(sbi->s_encoding); #endif - kvfree(sbi); + kfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -1292,7 +1355,8 @@ static int f2fs_statfs_project(struct super_block *sb, limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { - curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; + curblock = (dquot->dq_dqb.dqb_curspace + + dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; buf->f_blocks = limit; buf->f_bfree = buf->f_bavail = (buf->f_blocks > curblock) ? @@ -1427,6 +1491,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_ZSTD: algtype = "zstd"; break; + case COMPRESS_LZORLE: + algtype = "lzo-rle"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); @@ -1543,10 +1610,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",whint_mode=%s", "user-based"); else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) seq_printf(seq, ",whint_mode=%s", "fs-based"); -#ifdef CONFIG_FS_ENCRYPTION - if (F2FS_OPTION(sbi).test_dummy_encryption) - seq_puts(seq, ",test_dummy_encryption"); -#endif + + fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb); + + if (sbi->sb->s_flags & SB_INLINECRYPT) + seq_puts(seq, ",inlinecrypt"); if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) seq_printf(seq, ",alloc_mode=%s", "default"); @@ -1563,7 +1631,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) seq_printf(seq, ",fsync_mode=%s", "nobarrier"); +#ifdef CONFIG_F2FS_FS_COMPRESSION f2fs_show_compress_options(seq, sbi->sb); +#endif return 0; } @@ -1575,7 +1645,6 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; - F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; @@ -1583,6 +1652,8 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; + sbi->sb->s_flags &= ~SB_INLINECRYPT; + set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -1713,7 +1784,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) GFP_KERNEL); if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kvfree(org_mount_opt.s_qf_names[j]); + kfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { @@ -1734,7 +1805,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi); /* parse mount options */ - err = parse_options(sb, data); + err = parse_options(sb, data, true); if (err) goto restore_opts; checkpoint_changed = @@ -1838,13 +1909,14 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kvfree(org_mount_opt.s_qf_names[i]); + kfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); + adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); return 0; restore_gc: @@ -1858,7 +1930,7 @@ restore_opts: #ifdef CONFIG_QUOTA F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif @@ -2410,9 +2482,10 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, ctx, len, fs_data, XATTR_CREATE); } -static bool f2fs_dummy_context(struct inode *inode) +static const union fscrypt_context * +f2fs_get_dummy_context(struct super_block *sb) { - return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); + return F2FS_OPTION(F2FS_SB(sb)).dummy_enc_ctx.ctx; } static bool f2fs_has_stable_inodes(struct super_block *sb) @@ -2427,15 +2500,36 @@ static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, *lblk_bits_ret = 8 * sizeof(block_t); } +static int f2fs_get_num_devices(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (f2fs_is_multi_device(sbi)) + return sbi->s_ndevs; + return 1; +} + +static void f2fs_get_devices(struct super_block *sb, + struct request_queue **devs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + devs[i] = bdev_get_queue(FDEV(i).bdev); +} + static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, - .dummy_context = f2fs_dummy_context, + .get_dummy_context = f2fs_get_dummy_context, .empty_dir = f2fs_empty_dir, .max_namelen = F2FS_NAME_LEN, .has_stable_inodes = f2fs_has_stable_inodes, .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, + .get_num_devices = f2fs_get_num_devices, + .get_devices = f2fs_get_devices, }; #endif @@ -3026,7 +3120,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_seq = f2fs_kzalloc(sbi, + FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi, BITS_TO_LONGS(FDEV(devi).nr_blkz) * sizeof(unsigned long), GFP_KERNEL); @@ -3094,7 +3188,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kvfree(super); + kfree(super); else err = 0; @@ -3366,7 +3460,7 @@ try_onemore: goto free_sb_buf; } - err = parse_options(sb, options); + err = parse_options(sb, options, false); if (err) goto free_options; @@ -3413,7 +3507,6 @@ try_onemore: init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); - mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -3424,6 +3517,7 @@ try_onemore: /* init iostat info */ spin_lock_init(&sbi->iostat_lock); sbi->iostat_enable = false; + sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS; for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1: NR_TEMP_TYPE; @@ -3521,6 +3615,7 @@ try_onemore: sbi->reserved_blocks = 0; sbi->current_reserved_blocks = 0; limit_reserve_root(sbi); + adjust_unusable_cap_perc(sbi); for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); @@ -3767,15 +3862,16 @@ free_bio_info: free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif + fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx); kvfree(options); free_sb_buf: - kvfree(raw_super); + kfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kvfree(sbi); + kfree(sbi); /* give only one another chance */ if (retry_cnt > 0 && skip_recovery) { @@ -3890,7 +3986,12 @@ static int __init init_f2fs_fs(void) err = f2fs_init_bioset(); if (err) goto free_bio_enrty_cache; + err = f2fs_init_compress_mempool(); + if (err) + goto free_bioset; return 0; +free_bioset: + f2fs_destroy_bioset(); free_bio_enrty_cache: f2fs_destroy_bio_entry_cache(); free_post_read: @@ -3918,6 +4019,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_compress_mempool(); f2fs_destroy_bioset(); f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e3bbbef9b4f0..88ed9969cc86 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -15,6 +15,7 @@ #include "f2fs.h" #include "segment.h" #include "gc.h" +#include <trace/events/f2fs.h> static struct proc_dir_entry *f2fs_proc_root; @@ -26,7 +27,7 @@ enum { NM_INFO, /* struct f2fs_nm_info */ F2FS_SBI, /* struct f2fs_sb_info */ #ifdef CONFIG_F2FS_STAT_FS - STAT_INFO, /* struct f2fs_stat_info */ + STAT_INFO, /* struct f2fs_stat_info */ #endif #ifdef CONFIG_F2FS_FAULT_INJECTION FAULT_INFO_RATE, /* struct f2fs_fault_info */ @@ -222,6 +223,13 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, } #endif +static ssize_t main_blkaddr_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)MAIN_BLKADDR(sbi)); +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -349,16 +357,20 @@ out: return -EINVAL; if (!strcmp(a->attr.name, "gc_urgent")) { - if (t >= 1) { - sbi->gc_mode = GC_URGENT; + if (t == 0) { + sbi->gc_mode = GC_NORMAL; + } else if (t == 1) { + sbi->gc_mode = GC_URGENT_HIGH; if (sbi->gc_thread) { sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all( &sbi->gc_thread->gc_wait_queue_head); wake_up_discard_thread(sbi, true); } + } else if (t == 2) { + sbi->gc_mode = GC_URGENT_LOW; } else { - sbi->gc_mode = GC_NORMAL; + return -EINVAL; } return count; } @@ -372,7 +384,6 @@ out: return count; } - if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; if (!sbi->iostat_enable) @@ -380,6 +391,15 @@ out: return count; } + if (!strcmp(a->attr.name, "iostat_period_ms")) { + if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS) + return -EINVAL; + spin_lock(&sbi->iostat_lock); + sbi->iostat_period_ms = (unsigned int)t; + spin_unlock(&sbi->iostat_lock); + return count; + } + *ui = (unsigned int)t; return count; @@ -446,6 +466,7 @@ enum feat_id { FEAT_SB_CHECKSUM, FEAT_CASEFOLD, FEAT_COMPRESSION, + FEAT_TEST_DUMMY_ENCRYPTION_V2, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -466,6 +487,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_SB_CHECKSUM: case FEAT_CASEFOLD: case FEAT_COMPRESSION: + case FEAT_TEST_DUMMY_ENCRYPTION_V2: return sprintf(buf, "supported\n"); } return 0; @@ -511,7 +533,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -536,6 +557,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); @@ -543,6 +565,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); @@ -551,6 +575,7 @@ F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); +F2FS_GENERAL_RO_ATTR(main_blkaddr); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -563,6 +588,7 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); +F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2, FEAT_TEST_DUMMY_ENCRYPTION_V2); #endif #ifdef CONFIG_BLK_DEV_ZONED F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED); @@ -615,6 +641,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle_interval), ATTR_LIST(umount_discard_timeout), ATTR_LIST(iostat_enable), + ATTR_LIST(iostat_period_ms), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), ATTR_LIST(extension_list), @@ -622,6 +649,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(data_io_flag), + ATTR_LIST(node_io_flag), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), ATTR_LIST(unusable), @@ -647,6 +676,7 @@ ATTRIBUTE_GROUPS(f2fs); static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), + ATTR_LIST(test_dummy_encryption_v2), #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(block_zoned), @@ -687,7 +717,7 @@ static struct kobj_type f2fs_ktype = { }; static struct kset f2fs_kset = { - .kobj = {.ktype = &f2fs_ktype}, + .kobj = {.ktype = &f2fs_ktype}, }; static struct kobj_type f2fs_feat_ktype = { @@ -750,6 +780,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, return 0; } +void f2fs_record_iostat(struct f2fs_sb_info *sbi) +{ + unsigned long long iostat_diff[NR_IO_TYPE]; + int i; + + if (time_is_after_jiffies(sbi->iostat_next_period)) + return; + + /* Need double check under the lock */ + spin_lock(&sbi->iostat_lock); + if (time_is_after_jiffies(sbi->iostat_next_period)) { + spin_unlock(&sbi->iostat_lock); + return; + } + sbi->iostat_next_period = jiffies + + msecs_to_jiffies(sbi->iostat_period_ms); + + for (i = 0; i < NR_IO_TYPE; i++) { + iostat_diff[i] = sbi->rw_iostat[i] - + sbi->prev_rw_iostat[i]; + sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; + } + spin_unlock(&sbi->iostat_lock); + + trace_f2fs_iostat(sbi, iostat_diff); +} + static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) { @@ -762,33 +819,58 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); - /* print app IOs */ + /* print app write IOs */ + seq_puts(seq, "[WRITE]\n"); seq_printf(seq, "app buffered: %-16llu\n", - sbi->write_iostat[APP_BUFFERED_IO]); + sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", - sbi->write_iostat[APP_DIRECT_IO]); + sbi->rw_iostat[APP_DIRECT_IO]); seq_printf(seq, "app mapped: %-16llu\n", - sbi->write_iostat[APP_MAPPED_IO]); + sbi->rw_iostat[APP_MAPPED_IO]); - /* print fs IOs */ + /* print fs write IOs */ seq_printf(seq, "fs data: %-16llu\n", - sbi->write_iostat[FS_DATA_IO]); + sbi->rw_iostat[FS_DATA_IO]); seq_printf(seq, "fs node: %-16llu\n", - sbi->write_iostat[FS_NODE_IO]); + sbi->rw_iostat[FS_NODE_IO]); seq_printf(seq, "fs meta: %-16llu\n", - sbi->write_iostat[FS_META_IO]); + sbi->rw_iostat[FS_META_IO]); seq_printf(seq, "fs gc data: %-16llu\n", - sbi->write_iostat[FS_GC_DATA_IO]); + sbi->rw_iostat[FS_GC_DATA_IO]); seq_printf(seq, "fs gc node: %-16llu\n", - sbi->write_iostat[FS_GC_NODE_IO]); + sbi->rw_iostat[FS_GC_NODE_IO]); seq_printf(seq, "fs cp data: %-16llu\n", - sbi->write_iostat[FS_CP_DATA_IO]); + sbi->rw_iostat[FS_CP_DATA_IO]); seq_printf(seq, "fs cp node: %-16llu\n", - sbi->write_iostat[FS_CP_NODE_IO]); + sbi->rw_iostat[FS_CP_NODE_IO]); seq_printf(seq, "fs cp meta: %-16llu\n", - sbi->write_iostat[FS_CP_META_IO]); + sbi->rw_iostat[FS_CP_META_IO]); + + /* print app read IOs */ + seq_puts(seq, "[READ]\n"); + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_READ_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_READ_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_READ_IO]); + + /* print fs read IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->rw_iostat[FS_GDATA_READ_IO]); + seq_printf(seq, "fs compr_data: %-16llu\n", + sbi->rw_iostat[FS_CDATA_READ_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_READ_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_READ_IO]); + + /* print other IOs */ + seq_puts(seq, "[OTHER]\n"); seq_printf(seq, "fs discard: %-16llu\n", - sbi->write_iostat[FS_DISCARD]); + sbi->rw_iostat[FS_DISCARD]); return 0; } diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index e8075fc5b228..789f6aa727fc 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * f2fs IO tracer * diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index d7d430a6f130..9eb0dba851e8 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -29,6 +29,8 @@ #include "f2fs.h" #include "xattr.h" +#define F2FS_VERIFY_VER (1) + static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode) { return round_up(inode->i_size, 65536); @@ -152,7 +154,7 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc, struct inode *inode = file_inode(filp); u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; struct fsverity_descriptor_location dloc = { - .version = cpu_to_le32(1), + .version = cpu_to_le32(F2FS_VERIFY_VER), .size = cpu_to_le32(desc_size), .pos = cpu_to_le64(desc_pos), }; @@ -199,7 +201,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL); if (res < 0 && res != -ERANGE) return res; - if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) { + if (res != sizeof(dloc) || dloc.version != cpu_to_le32(F2FS_VERIFY_VER)) { f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format"); return -EINVAL; } @@ -222,37 +224,6 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, return size; } -/* - * Prefetch some pages from the file's Merkle tree. - * - * This is basically a stripped-down version of __do_page_cache_readahead() - * which works on pages past i_size. - */ -static void f2fs_merkle_tree_readahead(struct address_space *mapping, - pgoff_t start_index, unsigned long count) -{ - LIST_HEAD(pages); - unsigned int nr_pages = 0; - struct page *page; - pgoff_t index; - struct blk_plug plug; - - for (index = start_index; index < start_index + count; index++) { - page = xa_load(&mapping->i_pages, index); - if (!page || xa_is_value(page)) { - page = __page_cache_alloc(readahead_gfp_mask(mapping)); - if (!page) - break; - page->index = index; - list_add(&page->lru, &pages); - nr_pages++; - } - } - blk_start_plug(&plug); - f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); - blk_finish_plug(&plug); -} - static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) @@ -266,8 +237,8 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, if (page) put_page(page); else if (num_ra_pages > 1) - f2fs_merkle_tree_readahead(inode->i_mapping, index, - num_ra_pages); + page_cache_readahead_unbounded(inode->i_mapping, NULL, + index, num_ra_pages, 0); page = read_mapping_page(inode->i_mapping, index, NULL); } return page; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 4f6582ef7ee3..1b0736ce0918 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -175,8 +175,8 @@ const struct xattr_handler f2fs_xattr_trusted_handler = { const struct xattr_handler f2fs_xattr_advise_handler = { .name = F2FS_SYSTEM_ADVISE_NAME, .flags = F2FS_XATTR_INDEX_ADVISE, - .get = f2fs_xattr_advise_get, - .set = f2fs_xattr_advise_set, + .get = f2fs_xattr_advise_get, + .set = f2fs_xattr_advise_set, }; const struct xattr_handler f2fs_xattr_security_handler = { diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 938fcd20565d..416d652774a3 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * fs/f2fs/xattr.h * @@ -136,6 +136,7 @@ extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL +#define f2fs_listxattr NULL static inline int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *page, int flags) @@ -148,11 +149,6 @@ static inline int f2fs_getxattr(struct inode *inode, int index, { return -EOPNOTSUPP; } -static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, - size_t buffer_size) -{ - return -EOPNOTSUPP; -} static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif |