From bf22c3cc8ce71454dddd772284773306a68031d8 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 18 Feb 2020 09:19:07 +0530 Subject: f2fs: fix the panic in do_checkpoint() There could be a scenario where f2fs_sync_meta_pages() will not ensure that all F2FS_DIRTY_META pages are submitted for IO. Thus, resulting in the below panic in do_checkpoint() - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && !f2fs_cp_error(sbi)); This can happen in a low-memory condition, where shrinker could also be doing the writepage operation (stack shown below) at the same time when checkpoint is running on another core. schedule down_write f2fs_submit_page_write -> by this time, this page in page cache is tagged as PAGECACHE_TAG_WRITEBACK and PAGECACHE_TAG_DIRTY is cleared, due to which f2fs_sync_meta_pages() cannot sync this page in do_checkpoint() path. f2fs_do_write_meta_page __f2fs_write_meta_page f2fs_write_meta_page shrink_page_list shrink_inactive_list shrink_node_memcg shrink_node kswapd Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 65a7a432dfee..686f5402660e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1172,7 +1172,7 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); f2fs_bug_on(sbi, sbi->fsync_node_num); -- cgit v1.2.3 From c10c98203222f4baef78a641bbb525a00eeebd2f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:03 +0800 Subject: f2fs: cover last_disk_size update with spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change solves below hangtask issue: INFO: task kworker/u16:1:58 blocked for more than 122 seconds. Not tainted 5.6.0-rc2-00590-g9983bdae4974e #11 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u16:1 D 0 58 2 0x00000000 Workqueue: writeback wb_workfn (flush-179:0) Backtrace: (__schedule) from [] (schedule+0x78/0xf4) (schedule) from [] (rwsem_down_write_slowpath+0x24c/0x4c0) (rwsem_down_write_slowpath) from [] (down_write+0x6c/0x70) (down_write) from [] (f2fs_write_single_data_page+0x608/0x7ac) (f2fs_write_single_data_page) from [] (f2fs_write_cache_pages+0x2b4/0x7c4) (f2fs_write_cache_pages) from [] (f2fs_write_data_pages+0x344/0x35c) (f2fs_write_data_pages) from [] (do_writepages+0x3c/0xd4) (do_writepages) from [] (__writeback_single_inode+0x44/0x454) (__writeback_single_inode) from [] (writeback_sb_inodes+0x204/0x4b0) (writeback_sb_inodes) from [] (__writeback_inodes_wb+0x50/0xe4) (__writeback_inodes_wb) from [] (wb_writeback+0x294/0x338) (wb_writeback) from [] (wb_workfn+0x35c/0x54c) (wb_workfn) from [] (process_one_work+0x214/0x544) (process_one_work) from [] (worker_thread+0x4c/0x574) (worker_thread) from [] (kthread+0x144/0x170) (kthread) from [] (ret_from_fork+0x14/0x2c) Reported-and-tested-by: Ondřej Jirman Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/file.c | 4 ++-- fs/f2fs/super.c | 1 + 5 files changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6da33ba2b2db..25bc6154c31c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -898,10 +898,10 @@ unlock_continue: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - down_write(&fi->i_sem); + spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) fi->last_disk_size = psize; - up_write(&fi->i_sem); + spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b27b72107911..e5dc1ebb5305 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2647,10 +2647,10 @@ write: if (err) { file_set_keep_isize(inode); } else { - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } done: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a5656dd00bc2..43900c1e34fc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -696,6 +696,7 @@ struct f2fs_inode_info { struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ + spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; @@ -2855,9 +2856,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) if (!f2fs_is_time_consistent(inode)) return false; - down_read(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); - up_read(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); return ret; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0d4da644df3b..867bd6e444e7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -929,10 +929,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); inode->i_mtime = inode->i_ctime = current_time(inode); F2FS_I(inode)->last_disk_size = i_size_read(inode); - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } __setattr_copy(inode, attr); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 686f5402660e..ca34c214cc88 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -960,6 +960,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); init_rwsem(&fi->i_sem); + spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); -- cgit v1.2.3 From 7a88ddb56077d07257a5d0393a4be13e424ca755 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:05 +0800 Subject: f2fs: fix inconsistent comments Lack of maintenance on comments may mislead developers, fix them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 ++++-------------- fs/f2fs/data.c | 19 ++++++------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 1 - fs/f2fs/gc.c | 5 ++++- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 2 +- fs/f2fs/node.c | 6 +----- fs/f2fs/shrinker.c | 2 +- fs/f2fs/super.c | 4 ++-- 10 files changed, 21 insertions(+), 40 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c88fb3d255a..0dea31f1ddad 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -50,9 +50,6 @@ repeat: return page; } -/* - * We guarantee no failure on the returned page. - */ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, bool is_meta) { @@ -206,7 +203,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, } /* - * Readahead CP/NAT/SIT/SSA pages + * Readahead CP/NAT/SIT/SSA/POR pages */ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) @@ -898,7 +895,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) return -ENOMEM; /* * Finding out valid cp block involves read both - * sets( cp pack1 and cp pack 2) + * sets( cp pack 1 and cp pack 2) */ cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); @@ -1385,10 +1382,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - /* - * modify checkpoint - * version number is already updated - */ + /* start to update checkpoint, cp ver is already updated previously */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { @@ -1541,9 +1535,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } -/* - * We guarantee that this checkpoint procedure will not fail. - */ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1611,7 +1602,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_sit_entries(sbi, cpc); - /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) f2fs_release_discard_addrs(sbi); @@ -1624,7 +1614,7 @@ stop: if (cpc->reason & CP_RECOVERY) f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); - /* do checkpoint periodically */ + /* update CP_TIME to trigger checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e5dc1ebb5305..cd388e72390b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -364,9 +364,6 @@ static void f2fs_write_end_io(struct bio *bio) bio_put(bio); } -/* - * Return true, if pre_bio's bdev is same as its target device. - */ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -403,6 +400,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) return 0; } +/* + * Return true, if pre_bio's bdev is same as its target device. + */ static bool __same_bdev(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -410,9 +410,6 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno; } -/* - * Low-level block read/write IO operations. - */ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) { struct f2fs_sb_info *sbi = fio->sbi; @@ -1388,13 +1385,9 @@ void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) } /* - * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with - * f2fs_map_blocks structure. - * If original data blocks are allocated, then give them to blockdev. - * Otherwise, - * a. preallocate requested block addresses - * b. do not use extent cache for better performance - * c. give the block addresses to blockdev + * f2fs_map_blocks() tries to find or build mapping relationship which + * maps continuous logical blocks to physical blocks, and return such + * info via f2fs_map_blocks structure. */ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 43900c1e34fc..760a05f98ac7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2388,9 +2388,9 @@ static inline block_t datablock_addr(struct inode *inode, raw_node = F2FS_NODE(node_page); - /* from GC path only */ if (is_inode) { if (!inode) + /* from GC path only */ base = offset_in_addr(&raw_node->i); else if (f2fs_has_extra_attr(inode)) base = get_extra_isize(inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 867bd6e444e7..8c725111840e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -112,7 +112,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } } - /* fill the page */ f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index db8725d473b5..608d04950f05 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -192,7 +192,10 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - /* we need to check every dirty segments in the FG_GC case */ + /* + * adjust candidates range, should select all dirty segments for + * foreground GC and urgent GC cases. + */ if (gc_type != FG_GC && (sbi->gc_mode != GC_URGENT) && p->max_search > sbi->max_victim_search) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9b49832296cf..45aff90af5c5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -776,7 +776,7 @@ no_delete: else f2fs_inode_synced(inode); - /* ino == 0, if f2fs_new_inode() was failed t*/ + /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2aa035422c0f..b75c70813f9e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -177,7 +177,7 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) } /* - * Set multimedia files as cold files for hot/cold data separation + * Set file's temperature for hot/cold data separation */ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d29774df0d64..6006802f9220 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -510,9 +510,6 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -/* - * This function always returns success - */ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -716,8 +713,7 @@ got: /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and - * f2fs_unlock_op() only if ro is not set RDONLY_NODE. - * In the case of RDONLY_NODE, we don't need to care about mutex. + * f2fs_unlock_op() only if mode is set with ALLOC_NODE. */ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index a467aca29cfe..d66de5999a26 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -58,7 +58,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, /* count extent cache entries */ count += __count_extent_cache(sbi); - /* shrink clean nat cache entries */ + /* count clean nat cache entries */ count += __count_nat_entries(sbi); /* count free nids cache entries */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ca34c214cc88..dcf9042d246c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1658,7 +1658,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) out_unlock: up_write(&sbi->gc_lock); restore_flag: - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } @@ -3590,7 +3590,7 @@ try_onemore: f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif - /* if there are nt orphan nodes free them */ + /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); if (err) goto free_meta; -- cgit v1.2.3 From a9117eca1de6b738e713d2142126db2cfbf6fb36 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:45:11 +0800 Subject: f2fs: fix to show norecovery mount option Previously, 'norecovery' mount option will be shown as 'disable_roll_forward', fix to show original option name correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 59d6ade50e22..221d035729f6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -100,6 +100,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 +#define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dcf9042d246c..a05139b56737 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -446,7 +446,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_norecovery: /* this option mounts f2fs with ro */ - set_opt(sbi, DISABLE_ROLL_FORWARD); + set_opt(sbi, NORECOVERY); if (!f2fs_readonly(sb)) return -EINVAL; break; @@ -1447,6 +1447,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) } if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, NORECOVERY)) + seq_puts(seq, ",norecovery"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); else @@ -3599,7 +3601,8 @@ try_onemore: goto reset_checkpoint; /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + if (!test_opt(sbi, DISABLE_ROLL_FORWARD) && + !test_opt(sbi, NORECOVERY)) { /* * mount should be failed, when device has readonly mode, and * previous checkpoint was not done by clean system shutdown. -- cgit v1.2.3 From b0332a0f957ca818642cfafdb9515d4fd3b24663 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:12 +0800 Subject: f2fs: clean up lfs/adaptive mount option This patch removes F2FS_MOUNT_ADAPTIVE and F2FS_MOUNT_LFS mount options, and add F2FS_OPTION.fs_mode with below two status to indicate filesystem mode. enum { FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_LFS, /* use lfs allocation only */ }; It can enhance code readability and fs mode's scalability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 27 ++++++++++----------------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 12 ++++++------ fs/f2fs/super.c | 16 ++++++++-------- 6 files changed, 30 insertions(+), 37 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e589508ee155..9fba196bd808 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -442,7 +442,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (test_opt(sbi, LFS) && current->plug) + if (f2fs_lfs_mode(sbi) && current->plug) blk_finish_plug(current->plug); if (F2FS_IO_ALIGNED(sbi)) @@ -1413,7 +1413,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1468,7 +1468,7 @@ next_block: if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); if (err) @@ -2388,7 +2388,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return true; if (S_ISDIR(inode->i_mode)) return true; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 221d035729f6..e27d30d6a387 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -91,8 +91,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 -#define F2FS_MOUNT_ADAPTIVE 0x00020000 -#define F2FS_MOUNT_LFS 0x00040000 #define F2FS_MOUNT_USRQUOTA 0x00080000 #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 @@ -139,6 +137,7 @@ struct f2fs_mount_info { int whint_mode; int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ + int fs_mode; /* fs mode: LFS or ADAPTIVE */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -1173,6 +1172,11 @@ enum { GC_URGENT, }; +enum { + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ +}; + enum { WHINT_MODE_OFF, /* not pass down write hints */ WHINT_MODE_USER, /* try to pass down hints given by users */ @@ -3910,20 +3914,9 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) return false; } - -static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) { - clear_opt(sbi, ADAPTIVE); - clear_opt(sbi, LFS); - - switch (mt) { - case F2FS_MOUNT_ADAPTIVE: - set_opt(sbi, ADAPTIVE); - break; - case F2FS_MOUNT_LFS: - set_opt(sbi, LFS); - break; - } + return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } static inline bool f2fs_may_encrypt(struct inode *inode) @@ -3978,7 +3971,7 @@ static inline int allow_outplace_dio(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - return (test_opt(sbi, LFS) && (rw == WRITE) && + return (f2fs_lfs_mode(sbi) && (rw == WRITE) && !block_unaligned_IO(inode, iocb, iter)); } @@ -4000,7 +3993,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, */ if (f2fs_sb_has_blkzoned(sbi)) return true; - if (test_opt(sbi, LFS) && (rw == WRITE)) { + if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { if (block_unaligned_IO(inode, iocb, iter)) return true; if (F2FS_IO_ALIGNED(sbi)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e87cd93963fa..b318e4d9f825 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1126,7 +1126,7 @@ next_dnode: if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { - if (test_opt(sbi, LFS)) { + if (f2fs_lfs_mode(sbi)) { f2fs_put_dnode(&dn); return -EOPNOTSUPP; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3fdc3d6a874c..d883943f7f46 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -765,7 +765,7 @@ static int move_data_block(struct inode *inode, block_t bidx, struct page *page, *mpage; block_t newaddr; int err = 0; - bool lfs_mode = test_opt(fio.sbi, LFS); + bool lfs_mode = f2fs_lfs_mode(fio.sbi); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c00cf5c27e95..04156df83e7a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -172,7 +172,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return false; if (sbi->gc_mode == GC_URGENT) return true; @@ -1940,7 +1940,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1972,7 +1972,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { + if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2830,7 +2830,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3193,7 +3193,7 @@ static void update_device_state(struct f2fs_io_info *fio) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) down_read(&fio->sbi->io_order_lock); @@ -4678,7 +4678,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; - if (!test_opt(sbi, LFS)) + if (!f2fs_lfs_mode(sbi)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a05139b56737..3b0483f5e67f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -600,10 +600,10 @@ static int parse_options(struct super_block *sb, char *options) kvfree(name); return -EINVAL; } - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (strlen(name) == 3 && !strncmp(name, "lfs", 3)) { - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { kvfree(name); return -EINVAL; @@ -904,7 +904,7 @@ static int parse_options(struct super_block *sb, char *options) } #endif - if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { + if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", F2FS_IO_SIZE_KB(sbi)); return -EINVAL; @@ -934,7 +934,7 @@ static int parse_options(struct super_block *sb, char *options) } } - if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -1500,9 +1500,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",data_flush"); seq_puts(seq, ",mode="); - if (test_opt(sbi, ADAPTIVE)) + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_ADAPTIVE) seq_puts(seq, "adaptive"); - else if (test_opt(sbi, LFS)) + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) @@ -1589,9 +1589,9 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi)) - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; else - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); -- cgit v1.2.3 From bbbc34fd66625473d20f89def2703f4f140d2ce8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:13 +0800 Subject: f2fs: clean up bggc mount option There are three status for background gc: on, off and sync, it's a little bit confused to use test_opt(BG_GC) and test_opt(FORCE_FG_GC) combinations to indicate status of background gc. So let's remove F2FS_MOUNT_BG_GC and F2FS_MOUNT_FORCE_FG_GC mount options, and add F2FS_OPTION().bggc_mode with below three status to clean up codes and enhance bggc mode's scalability. enum { BGGC_MODE_ON, /* background gc is on */ BGGC_MODE_OFF, /* background gc is off */ BGGC_MODE_SYNC, /* * background gc is on, migrating blocks * like foreground gc */ }; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++++++-- fs/f2fs/gc.c | 6 +++++- fs/f2fs/super.c | 29 +++++++++++++---------------- 3 files changed, 28 insertions(+), 19 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e27d30d6a387..ef3ec6548629 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -74,7 +74,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; /* * For mount options */ -#define F2FS_MOUNT_BG_GC 0x00000001 #define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 #define F2FS_MOUNT_DISCARD 0x00000004 #define F2FS_MOUNT_NOHEAP 0x00000008 @@ -88,7 +87,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 -#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_USRQUOTA 0x00080000 @@ -138,6 +136,7 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ + int bggc_mode; /* bggc mode: off, on or sync */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -1172,6 +1171,15 @@ enum { GC_URGENT, }; +enum { + BGGC_MODE_ON, /* background gc is on */ + BGGC_MODE_OFF, /* background gc is off */ + BGGC_MODE_SYNC, /* + * background gc is on, migrating blocks + * like foreground gc + */ +}; + enum { FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_LFS, /* use lfs allocation only */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d883943f7f46..a19966b0e018 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,6 +31,8 @@ static int gc_thread_func(void *data) set_freezable(); do { + bool sync_mode; + wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || gc_th->gc_wake, @@ -101,8 +103,10 @@ static int gc_thread_func(void *data) do_gc: stat_inc_bggc_count(sbi->stat_info); + sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3b0483f5e67f..cdcb80f9b3d0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -427,14 +427,11 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; if (strlen(name) == 2 && !strncmp(name, "on", 2)) { - set_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { - clear_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { - set_opt(sbi, BG_GC); - set_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { kvfree(name); return -EINVAL; @@ -1437,14 +1434,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) { - if (test_opt(sbi, FORCE_FG_GC)) - seq_printf(seq, ",background_gc=%s", "sync"); - else - seq_printf(seq, ",background_gc=%s", "on"); - } else { + if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) + seq_printf(seq, ",background_gc=%s", "sync"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_ON) + seq_printf(seq, ",background_gc=%s", "on"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); - } + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1576,8 +1572,8 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -1783,7 +1779,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) { + if ((*flags & SB_RDONLY) || + F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3668,7 +3665,7 @@ reset_checkpoint: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { + if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) -- cgit v1.2.3 From 5df7731f60c2a933695a68d732f8b39fca788de6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Feb 2020 17:45:44 +0800 Subject: f2fs: introduce DEFAULT_IO_TIMEOUT As Geert Uytterhoeven reported: for parameter HZ/50 in congestion_wait(BLK_RW_ASYNC, HZ/50); On some platforms, HZ can be less than 50, then unexpected 0 timeout jiffies will be set in congestion_wait(). This patch introduces a macro DEFAULT_IO_TIMEOUT to wrap a determinate value with msecs_to_jiffies(20) to instead HZ/50 to avoid such issue. Quoted from Geert Uytterhoeven: "A timeout of HZ means 1 second. HZ/50 means 20 ms, but has the risk of being zero, if HZ < 50. If you want to use a timeout of 20 ms, you best use msecs_to_jiffies(20), as that takes care of the special cases, and never returns 0." Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/compress.c | 3 ++- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 3 ++- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 5 +++-- fs/f2fs/segment.c | 10 ++++++---- fs/f2fs/super.c | 6 ++++-- 10 files changed, 25 insertions(+), 15 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0dea31f1ddad..46fc9c1542fe 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1260,7 +1260,7 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) if (unlikely(f2fs_cp_error(sbi))) break; - io_schedule_timeout(HZ/50); + io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5ef7d7cfc0e7..748933813cd1 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -988,7 +988,8 @@ retry_write: } else if (ret == -EAGAIN) { ret = 0; cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); lock_page(cc->rpages[i]); clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9fba196bd808..43d705727aa5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2317,7 +2317,7 @@ retry_encrypt: /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } @@ -2908,7 +2908,7 @@ result: if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + DEFAULT_IO_TIMEOUT); goto retry_write; } goto next; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ef3ec6548629..241a87d0d71c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -558,6 +558,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* congestion wait timeout value, default: 20ms */ +#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) + /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0dfdec4926e4..f6958ae8c157 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -977,7 +977,8 @@ retry: if (err) { clear_cold_data(page); if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } if (is_dirty) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 45aff90af5c5..44e08bf2e2b4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -535,7 +535,7 @@ retry: inode = f2fs_iget(sb, ino); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cc2646794130..aea9915a7897 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2599,7 +2599,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) retry: ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); if (!ipage) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 348e8d463b3e..dd804c07eeb0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -534,7 +534,7 @@ retry_dn: err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry_dn; } goto out; @@ -617,7 +617,8 @@ retry_prev: err = check_index_in_prev_nodes(sbi, dest, &dn); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry_prev; } goto err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 04156df83e7a..a2beb0e81440 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -245,7 +245,8 @@ retry: LOOKUP_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -312,7 +313,7 @@ next: skip: iput(inode); } - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); cond_resched(); if (gc_failure) { if (++looped >= count) @@ -415,7 +416,8 @@ retry: err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -2801,7 +2803,7 @@ next: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto next; } skip: diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cdcb80f9b3d0..36539afae1e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1885,7 +1885,8 @@ repeat: page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); if (IS_ERR(page)) { if (PTR_ERR(page) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto repeat; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); @@ -1939,7 +1940,8 @@ retry: &page, NULL); if (unlikely(err)) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); -- cgit v1.2.3 From a999150f4fe3abbb7efd05411fd5b460be699943 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Feb 2020 18:17:10 +0800 Subject: f2fs: use kmem_cache pool during inline xattr lookups It's been observed that kzalloc() on lookup_all_xattrs() are called millions of times on Android, quickly becoming the top abuser of slub memory allocator. Use a dedicated kmem cache pool for xattr lookups to mitigate this. Signed-off-by: Park Ju Hyung Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/super.c | 10 +++++++++- fs/f2fs/xattr.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/xattr.h | 4 ++++ 4 files changed, 65 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index baaf7f0fd02a..3e727a6af08e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1487,6 +1487,9 @@ struct f2fs_sb_info { __u32 s_chksum_seed; struct workqueue_struct *post_read_wq; /* post read workqueue */ + + struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ + unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ }; struct f2fs_private_dio { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 36539afae1e5..9f435191f1e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1202,6 +1202,7 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->raw_super); destroy_device_list(sbi); + f2fs_destroy_xattr_caches(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -3458,12 +3459,17 @@ try_onemore: } } + /* init per sbi slab cache */ + err = f2fs_init_xattr_caches(sbi); + if (err) + goto free_io_dummy; + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_io_dummy; + goto free_xattr_cache; } err = f2fs_get_valid_checkpoint(sbi); @@ -3736,6 +3742,8 @@ free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); sbi->meta_inode = NULL; +free_xattr_cache: + f2fs_destroy_xattr_caches(sbi); free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index d4800df6302d..4f6582ef7ee3 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -23,6 +23,25 @@ #include "xattr.h" #include "segment.h" +static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) +{ + if (likely(size == sbi->inline_xattr_slab_size)) { + *is_inline = true; + return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS); + } + *is_inline = false; + return f2fs_kzalloc(sbi, size, GFP_NOFS); +} + +static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr, + bool is_inline) +{ + if (is_inline) + kmem_cache_free(sbi->inline_xattr_slab, xattr_addr); + else + kvfree(xattr_addr); +} + static int f2fs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) @@ -301,7 +320,8 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr, int *base_size) + void **base_addr, int *base_size, + bool *is_inline) { void *cur_addr, *txattr_addr, *last_txattr_addr; void *last_addr = NULL; @@ -313,7 +333,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, return -ENODATA; *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); + txattr_addr = xattr_alloc(F2FS_I_SB(inode), *base_size, is_inline); if (!txattr_addr) return -ENOMEM; @@ -362,7 +382,7 @@ check: *base_addr = txattr_addr; return 0; out: - kvfree(txattr_addr); + xattr_free(F2FS_I_SB(inode), txattr_addr, *is_inline); return err; } @@ -499,6 +519,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, unsigned int size, len; void *base_addr = NULL; int base_size; + bool is_inline; if (name == NULL) return -EINVAL; @@ -509,7 +530,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr, &base_size); + &entry, &base_addr, &base_size, &is_inline); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -532,7 +553,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, } error = size; out: - kvfree(base_addr); + xattr_free(F2FS_I_SB(inode), base_addr, is_inline); return error; } @@ -764,3 +785,26 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_update_time(sbi, REQ_TIME); return err; } + +int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + char slab_name[32]; + + sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev)); + + sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size * + sizeof(__le32) + XATTR_PADDING_SIZE; + + sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name, + sbi->inline_xattr_slab_size); + if (!sbi->inline_xattr_slab) + return -ENOMEM; + + return 0; +} + +void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) +{ + kmem_cache_destroy(sbi->inline_xattr_slab); +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 574beea46494..0153b4c9ef21 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -131,6 +131,8 @@ extern int f2fs_setxattr(struct inode *, int, const char *, extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +extern int f2fs_init_xattr_caches(struct f2fs_sb_info *); +extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL @@ -151,6 +153,8 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, { return -EOPNOTSUPP; } +static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } +static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY -- cgit v1.2.3 From 62f63eea291b50a5677ae7503ac128803174698a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:58:00 +0800 Subject: f2fs: fix NULL pointer dereference in f2fs_write_begin() BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:f2fs_write_begin+0x823/0xb90 [f2fs] Call Trace: f2fs_quota_write+0x139/0x1d0 [f2fs] write_blk+0x36/0x80 [quota_tree] get_free_dqblk+0x42/0xa0 [quota_tree] do_insert_tree+0x235/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] qtree_write_dquot+0x70/0x190 [quota_tree] v2_write_dquot+0x43/0x90 [quota_v2] dquot_acquire+0x77/0x100 f2fs_dquot_acquire+0x2f/0x60 [f2fs] dqget+0x310/0x450 dquot_transfer+0x7e/0x120 f2fs_setattr+0x11a/0x4a0 [f2fs] notify_change+0x349/0x480 chown_common+0x168/0x1c0 do_fchownat+0xbc/0xf0 __x64_sys_fchownat+0x20/0x30 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Passing fsdata parameter to .write_{begin,end} in f2fs_quota_write(), so that if quota file is compressed one, we can avoid above NULL pointer dereference when updating quota content. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9f435191f1e5..252d4ea926bf 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1929,6 +1929,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, int offset = off & (sb->s_blocksize - 1); size_t towrite = len; struct page *page; + void *fsdata = NULL; char *kaddr; int err = 0; int tocopy; @@ -1938,7 +1939,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, towrite); retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, - &page, NULL); + &page, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, @@ -1955,7 +1956,7 @@ retry: flush_dcache_page(page); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, - page, NULL); + page, fsdata); offset = 0; towrite -= tocopy; off += tocopy; -- cgit v1.2.3 From 91faa5344fd22acf383a01701f184c450b7a6a76 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:05 +0800 Subject: f2fs: change default compression algorithm Use LZ4 as default compression algorithm, as compared to LZO, it shows almost the same compression ratio and much better decompression speed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 252d4ea926bf..0c901a4d3d44 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1570,7 +1570,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; -- cgit v1.2.3 From 50cfa66f0de02eff30fb81bdc878bb986cf3aff3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 17:46:02 +0800 Subject: f2fs: compress: support zstd compress algorithm Add zstd compress algorithm support, use "compress_algorithm=zstd" mountoption to enable it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 +- fs/f2fs/Kconfig | 9 ++ fs/f2fs/compress.c | 165 +++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 5 ++ fs/f2fs/super.c | 7 ++ include/trace/events/f2fs.h | 3 +- 6 files changed, 190 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 4eb3e2ddd00e..b1a66cf0e967 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -235,8 +235,8 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "en hide up to all remaining free space. The actual space that would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. -compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" - and "lz4" algorithm. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo", + "lz4" and "zstd" algorithm. compress_log_size=%u Support configuring compress cluster size, the size will be 4KB * (1 << %u), 16KB is minimum size, also it's default size. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index f0faada30f30..bb68d21e1f8c 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -118,3 +118,12 @@ config F2FS_FS_LZ4 default y help Support LZ4 compress algorithm, if unsure, say Y. + +config F2FS_FS_ZSTD + bool "ZSTD compression support" + depends on F2FS_FS_COMPRESSION + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS + default y + help + Support ZSTD compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 13666735f567..c7284dd7d52f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -291,6 +292,165 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_ZSTD +#define F2FS_ZSTD_DEFAULT_CLEVEL 1 + +static int zstd_init_compress_ctx(struct compress_ctx *cc) +{ + ZSTD_parameters params; + ZSTD_CStream *stream; + void *workspace; + unsigned int workspace_size; + + params = ZSTD_getParams(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen, 0); + workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + + workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initCStream(params, 0, workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + cc->private = workspace; + cc->private2 = stream; + + cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE; + return 0; +} + +static void zstd_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; + cc->private2 = NULL; +} + +static int zstd_compress_pages(struct compress_ctx *cc) +{ + ZSTD_CStream *stream = cc->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int src_size = cc->rlen; + int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE; + int ret; + + inbuf.pos = 0; + inbuf.src = cc->rbuf; + inbuf.size = src_size; + + outbuf.pos = 0; + outbuf.dst = cc->cbuf->cdata; + outbuf.size = dst_size; + + ret = ZSTD_compressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + ret = ZSTD_endStream(stream, &outbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + cc->clen = outbuf.pos; + return 0; +} + +static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream; + void *workspace; + unsigned int workspace_size; + + workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE); + + workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE, + workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + dic->private = workspace; + dic->private2 = stream; + + return 0; +} + +static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) +{ + kvfree(dic->private); + dic->private = NULL; + dic->private2 = NULL; +} + +static int zstd_decompress_pages(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream = dic->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int ret; + + inbuf.pos = 0; + inbuf.src = dic->cbuf->cdata; + inbuf.size = dic->clen; + + outbuf.pos = 0; + outbuf.dst = dic->rbuf; + outbuf.size = dic->rlen; + + ret = ZSTD_decompressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + if (dic->rlen != outbuf.pos) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + __func__, dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + + return 0; +} + +static const struct f2fs_compress_ops f2fs_zstd_ops = { + .init_compress_ctx = zstd_init_compress_ctx, + .destroy_compress_ctx = zstd_destroy_compress_ctx, + .compress_pages = zstd_compress_pages, + .init_decompress_ctx = zstd_init_decompress_ctx, + .destroy_decompress_ctx = zstd_destroy_decompress_ctx, + .decompress_pages = zstd_decompress_pages, +}; +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -302,6 +462,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#ifdef CONFIG_F2FS_FS_ZSTD + &f2fs_zstd_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dea45bb57862..8d206dcc33ab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1267,6 +1267,7 @@ enum fsync_mode { enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, + COMPRESS_ZSTD, COMPRESS_MAX, }; @@ -1296,6 +1297,7 @@ struct compress_ctx { size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ void *private; /* payload buffer for specified compression algorithm */ + void *private2; /* extra payload buffer */ }; /* compress context for write IO path */ @@ -1325,11 +1327,14 @@ struct decompress_io_ctx { size_t clen; /* valid data length in cbuf */ refcount_t ref; /* referrence count of compressed page */ bool failed; /* indicate IO error during decompression */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ }; #define NULL_CLUSTER ((unsigned int)(~0)) #define MIN_COMPRESS_LOG_SIZE 2 #define MAX_COMPRESS_LOG_SIZE 8 +#define MAX_COMPRESS_WINDOW_SIZE ((PAGE_SIZE) << MAX_COMPRESS_LOG_SIZE) struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0c901a4d3d44..b83b17b54a0a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -829,6 +829,10 @@ static int parse_options(struct super_block *sb, char *options) !strcmp(name, "lz4")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + } else if (strlen(name) == 4 && + !strcmp(name, "zstd")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_ZSTD; } else { kfree(name); return -EINVAL; @@ -1419,6 +1423,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_LZ4: algtype = "lz4"; break; + case COMPRESS_ZSTD: + algtype = "zstd"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 67a97838c2a0..d97adfc327f0 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -153,7 +153,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); #define show_compress_algorithm(type) \ __print_symbolic(type, \ { COMPRESS_LZO, "LZO" }, \ - { COMPRESS_LZ4, "LZ4" }) + { COMPRESS_LZ4, "LZ4" }, \ + { COMPRESS_ZSTD, "ZSTD" }) struct f2fs_sb_info; struct f2fs_io_info; -- cgit v1.2.3