summaryrefslogtreecommitdiffstats
path: root/fs/f2fs/f2fs.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-31 16:52:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-31 16:52:59 -0700
commit1501f707d2b24316b41d45bdc95a73bc8cc8dd49 (patch)
tree432a2c255fd269f4910d6c36cfe9eb58cdc92f5d /fs/f2fs/f2fs.h
parent2a5699b0de4ee623d77f183c8e8e62691bd60a70 (diff)
parent2d1fe8a86bf5e0663866fd0da83c2af1e1b0e362 (diff)
downloadlinux-1501f707d2b24316b41d45bdc95a73bc8cc8dd49.tar.bz2
Merge tag 'f2fs-for-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we've refactored the existing atomic write support implemented by in-memory operations to have storing data in disk temporarily, which can give us a benefit to accept more atomic writes. At the same time, we removed the existing volatile write support. We've also revisited the file pinning and GC flows and found some corner cases which contributeed abnormal system behaviours. As usual, there're several minor code refactoring for readability, sanity check, and clean ups. Enhancements: - allow compression for mmap files in compress_mode=user - kill volatile write support - change the current atomic write way - give priority to select unpinned section for foreground GC - introduce data read/write showing path info - remove unnecessary f2fs_lock_op in f2fs_new_inode Bug fixes: - fix the file pinning flow during checkpoint=disable and GCs - fix foreground and background GCs to select the right victims and get free sections on time - fix GC flags on defragmenting pages - avoid an infinite loop to flush node pages - fix fallocate to use file_modified to update permissions consistently" * tag 'f2fs-for-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (40 commits) f2fs: fix to tag gcing flag on page during file defragment f2fs: replace F2FS_I(inode) and sbi by the local variable f2fs: add f2fs_init_write_merge_io function f2fs: avoid unneeded error handling for revoke_entry_slab allocation f2fs: allow compression for mmap files in compress_mode=user f2fs: fix typo in comment f2fs: make f2fs_read_inline_data() more readable f2fs: fix to do sanity check for inline inode f2fs: fix fallocate to use file_modified to update permissions consistently f2fs: don't use casefolded comparison for "." and ".." f2fs: do not stop GC when requiring a free section f2fs: keep wait_ms if EAGAIN happens f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters f2fs: reject test_dummy_encryption when !CONFIG_FS_ENCRYPTION f2fs: kill volatile write support f2fs: change the current atomic write way f2fs: don't need inode lock for system hidden quota f2fs: stop allocating pinned sections if EAGAIN happens f2fs: skip GC if possible when checkpoint disabling f2fs: give priority to select unpinned section for foreground GC ...
Diffstat (limited to 'fs/f2fs/f2fs.h')
-rw-r--r--fs/f2fs/f2fs.h133
1 files changed, 63 insertions, 70 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 10d1f138d14f..d9bbecd008d2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -509,11 +509,11 @@ struct f2fs_filename {
#if IS_ENABLED(CONFIG_UNICODE)
/*
* For casefolded directories: the casefolded name, but it's left NULL
- * if the original name is not valid Unicode, if the directory is both
- * casefolded and encrypted and its encryption key is unavailable, or if
- * the filesystem is doing an internal operation where usr_fname is also
- * NULL. In all these cases we fall back to treating the name as an
- * opaque byte sequence.
+ * if the original name is not valid Unicode, if the original name is
+ * "." or "..", if the directory is both casefolded and encrypted and
+ * its encryption key is unavailable, or if the filesystem is doing an
+ * internal operation where usr_fname is also NULL. In all these cases
+ * we fall back to treating the name as an opaque byte sequence.
*/
struct fscrypt_str cf_name;
#endif
@@ -579,8 +579,8 @@ enum {
/* maximum retry quota flush count */
#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8
-/* maximum retry of EIO'ed meta page */
-#define MAX_RETRY_META_PAGE_EIO 100
+/* maximum retry of EIO'ed page */
+#define MAX_RETRY_PAGE_EIO 100
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
@@ -717,7 +717,6 @@ enum {
enum {
GC_FAILURE_PIN,
- GC_FAILURE_ATOMIC,
MAX_GC_FAILURE
};
@@ -739,8 +738,6 @@ enum {
FI_UPDATE_WRITE, /* inode has in-place-update data */
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
- FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
- FI_VOLATILE_FILE, /* indicate volatile file */
FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
@@ -753,7 +750,6 @@ enum {
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
FI_PIN_FILE, /* indicate file should not be gced */
- FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */
@@ -795,11 +791,9 @@ struct f2fs_inode_info {
#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
- struct list_head inmem_ilist; /* list for inmem inodes */
- struct list_head inmem_pages; /* inmemory pages managed by f2fs */
- struct task_struct *inmem_task; /* store inmemory task */
- struct mutex inmem_lock; /* lock for inmemory pages */
+ struct task_struct *atomic_write_task; /* store atomic write task */
struct extent_tree *extent_tree; /* cached extent_tree entry */
+ struct inode *cow_inode; /* copy-on-write inode for atomic write */
/* avoid racing between foreground op and gc */
struct f2fs_rwsem i_gc_rwsem[2];
@@ -1093,7 +1087,6 @@ enum count_type {
F2FS_DIRTY_QDATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
- F2FS_INMEM_PAGES,
F2FS_DIRTY_IMETA,
F2FS_WB_CP_DATA,
F2FS_WB_DATA,
@@ -1118,16 +1111,12 @@ enum count_type {
*/
#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
enum page_type {
- DATA,
- NODE,
+ DATA = 0,
+ NODE = 1, /* should not change this */
META,
NR_PAGE_TYPE,
META_FLUSH,
- INMEM, /* the below types are used by tracepoints only. */
- INMEM_DROP,
- INMEM_INVALIDATE,
- INMEM_REVOKE,
- IPU,
+ IPU, /* the below types are used by tracepoints only. */
OPU,
};
@@ -1277,6 +1266,15 @@ struct atgc_management {
unsigned long long age_threshold; /* age threshold */
};
+struct f2fs_gc_control {
+ unsigned int victim_segno; /* target victim segment number */
+ int init_gc_type; /* FG_GC or BG_GC */
+ bool no_bg_gc; /* check the space and stop bg_gc */
+ bool should_migrate_blocks; /* should migrate blocks */
+ bool err_gc_skipped; /* return EAGAIN if GC skipped */
+ unsigned int nr_free_secs; /* # of free sections to do GC */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -1615,8 +1613,8 @@ struct f2fs_sb_info {
/* keep migration IO order for LFS mode */
struct f2fs_rwsem io_order_lock;
mempool_t *write_io_dummy; /* Dummy pages */
- pgoff_t metapage_eio_ofs; /* EIO page offset */
- int metapage_eio_cnt; /* EIO count */
+ pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */
+ int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -1719,7 +1717,6 @@ struct f2fs_sb_info {
/* for skip statistic */
unsigned int atomic_files; /* # of opened atomic file */
- unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
@@ -1750,9 +1747,7 @@ struct f2fs_sb_info {
atomic_t inline_dir; /* # of inline_dentry inodes */
atomic_t compr_inode; /* # of compressed inodes */
atomic64_t compr_blocks; /* # of compressed blocks */
- atomic_t vw_cnt; /* # of volatile writes */
atomic_t max_aw_cnt; /* max # of atomic writes */
- atomic_t max_vw_cnt; /* max # of volatile writes */
unsigned int io_skip_bggc; /* skip background gc for in-flight IO */
unsigned int other_skip_bggc; /* skip background gc for other reasons */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
@@ -1763,7 +1758,7 @@ struct f2fs_sb_info {
unsigned int data_io_flag;
unsigned int node_io_flag;
- /* For sysfs suppport */
+ /* For sysfs support */
struct kobject s_kobj; /* /sys/fs/f2fs/<devname> */
struct completion s_kobj_unregister;
@@ -2606,11 +2601,17 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
{
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi, !sbi->total_valid_block_count);
- f2fs_bug_on(sbi, !sbi->total_valid_node_count);
+ if (unlikely(!sbi->total_valid_block_count ||
+ !sbi->total_valid_node_count)) {
+ f2fs_warn(sbi, "dec_valid_node_count: inconsistent block counts, total_valid_block:%u, total_valid_node:%u",
+ sbi->total_valid_block_count,
+ sbi->total_valid_node_count);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ } else {
+ sbi->total_valid_block_count--;
+ sbi->total_valid_node_count--;
+ }
- sbi->total_valid_node_count--;
- sbi->total_valid_block_count--;
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks++;
@@ -3173,6 +3174,10 @@ static inline int inline_xattr_size(struct inode *inode)
return 0;
}
+/*
+ * Notice: check inline_data flag without inode page lock is unsafe.
+ * It could change at any time by f2fs_convert_inline_page().
+ */
static inline int f2fs_has_inline_data(struct inode *inode)
{
return is_inode_flag_set(inode, FI_INLINE_DATA);
@@ -3203,16 +3208,6 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
return is_inode_flag_set(inode, FI_ATOMIC_FILE);
}
-static inline bool f2fs_is_commit_atomic_write(struct inode *inode)
-{
- return is_inode_flag_set(inode, FI_ATOMIC_COMMIT);
-}
-
-static inline bool f2fs_is_volatile_file(struct inode *inode)
-{
- return is_inode_flag_set(inode, FI_VOLATILE_FILE);
-}
-
static inline bool f2fs_is_first_block_written(struct inode *inode)
{
return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN);
@@ -3445,6 +3440,8 @@ void f2fs_handle_failed_inode(struct inode *inode);
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set);
struct dentry *f2fs_get_parent(struct dentry *child);
+int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+ struct inode **new_inode);
/*
* dir.c
@@ -3580,11 +3577,8 @@ void f2fs_destroy_node_manager_caches(void);
* segment.c
*/
bool f2fs_need_SSR(struct f2fs_sb_info *sbi);
-void f2fs_register_inmem_page(struct inode *inode, struct page *page);
-void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure);
-void f2fs_drop_inmem_pages(struct inode *inode);
-void f2fs_drop_inmem_page(struct inode *inode, struct page *page);
-int f2fs_commit_inmem_pages(struct inode *inode);
+int f2fs_commit_atomic_write(struct inode *inode);
+void f2fs_abort_atomic_write(struct inode *inode, bool clean);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg);
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
@@ -3726,6 +3720,7 @@ int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type);
+int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, struct page *page,
@@ -3787,8 +3782,7 @@ extern const struct iomap_ops f2fs_iomap_ops;
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
- unsigned int segno);
+int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
@@ -3816,7 +3810,6 @@ struct f2fs_stat_info {
int ext_tree, zombie_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
int ndirty_data, ndirty_qdata;
- int inmem_pages;
unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits;
int free_nids, avail_nids, alloc_nids;
@@ -3834,7 +3827,7 @@ struct f2fs_stat_info {
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
int compr_inode;
unsigned long long compr_blocks;
- int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt;
+ int aw_cnt, max_aw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
@@ -3846,7 +3839,6 @@ struct f2fs_stat_info {
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
int bg_data_blks, bg_node_blks;
- unsigned long long skipped_atomic_files[2];
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
@@ -3946,17 +3938,6 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
if (cur > max) \
atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \
} while (0)
-#define stat_inc_volatile_write(inode) \
- (atomic_inc(&F2FS_I_SB(inode)->vw_cnt))
-#define stat_dec_volatile_write(inode) \
- (atomic_dec(&F2FS_I_SB(inode)->vw_cnt))
-#define stat_update_max_volatile_write(inode) \
- do { \
- int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \
- int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \
- if (cur > max) \
- atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \
- } while (0)
#define stat_inc_seg_count(sbi, type, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@@ -4018,9 +3999,6 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
#define stat_add_compr_blocks(inode, blocks) do { } while (0)
#define stat_sub_compr_blocks(inode, blocks) do { } while (0)
#define stat_update_max_atomic_write(inode) do { } while (0)
-#define stat_inc_volatile_write(inode) do { } while (0)
-#define stat_dec_volatile_write(inode) do { } while (0)
-#define stat_update_max_volatile_write(inode) do { } while (0)
#define stat_inc_meta_count(sbi, blkaddr) do { } while (0)
#define stat_inc_seg_type(sbi, curseg) do { } while (0)
#define stat_inc_block_count(sbi, curseg) do { } while (0)
@@ -4053,6 +4031,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab;
* inline.c
*/
bool f2fs_may_inline_data(struct inode *inode);
+bool f2fs_sanity_check_inline_data(struct inode *inode);
bool f2fs_may_inline_dentry(struct inode *inode);
void f2fs_do_read_inline_data(struct page *page, struct page *ipage);
void f2fs_truncate_inline_inode(struct inode *inode,
@@ -4422,8 +4401,7 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
static inline bool f2fs_may_compress(struct inode *inode)
{
if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) ||
- f2fs_is_atomic_file(inode) ||
- f2fs_is_volatile_file(inode))
+ f2fs_is_atomic_file(inode))
return false;
return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
}
@@ -4431,8 +4409,8 @@ static inline bool f2fs_may_compress(struct inode *inode)
static inline void f2fs_i_compr_blocks_update(struct inode *inode,
u64 blocks, bool add)
{
- int diff = F2FS_I(inode)->i_cluster_size - blocks;
struct f2fs_inode_info *fi = F2FS_I(inode);
+ int diff = fi->i_cluster_size - blocks;
/* don't update i_compr_blocks if saved blocks were released */
if (!add && !atomic_read(&fi->i_compr_blocks))
@@ -4540,6 +4518,21 @@ static inline void f2fs_io_schedule_timeout(long timeout)
io_schedule_timeout(timeout);
}
+static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs,
+ enum page_type type)
+{
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
+ if (ofs == sbi->page_eio_ofs[type]) {
+ if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO)
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
+ } else {
+ sbi->page_eio_ofs[type] = ofs;
+ sbi->page_eio_cnt[type] = 0;
+ }
+}
+
#define EFSBADCRC EBADMSG /* Bad CRC detected */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */