From 1753396a0a1c574969dc0c4b369ac4ac3d299245 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:15:44 +0800 Subject: f2fs: remove unused inline_dentry_addr inline_dentry_addr is introduced with inline dentry feature without being used, now we do not need to keep it for any reason, so remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7fa3313ab0e2..a978b655bacd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1290,12 +1290,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); } -static inline void *inline_dentry_addr(struct page *page) -{ - struct f2fs_inode *ri = F2FS_INODE(page); - return (void *)&(ri->i_addr[1]); -} - static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) { if (!f2fs_has_inline_dentry(dir)) -- cgit v1.2.3 From 3b4d732a568432039af71809f9cad69565f00bed Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:17:20 +0800 Subject: f2fs: introduce f2fs_update_dentry to clean up duplicated codes This patch introduces f2fs_update_dentry to remove redundant code in f2fs_add_inline_entry and __f2fs_add_link. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 33 +++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/inline.c | 18 ++++++------------ 3 files changed, 31 insertions(+), 22 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b74097a7f6d9..583896cb5346 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -474,6 +474,24 @@ next: goto next; } +void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, + const struct qstr *name, f2fs_hash_t name_hash, + unsigned int bit_pos) +{ + struct f2fs_dir_entry *de; + int slots = GET_DENTRY_SLOTS(name->len); + int i; + + de = &d->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(name->len); + memcpy(d->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); +} + /* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). @@ -486,15 +504,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, unsigned int current_depth; unsigned long bidx, block; f2fs_hash_t dentry_hash; - struct f2fs_dir_entry *de; unsigned int nbucket, nblock; size_t namelen = name->len; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; + struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); struct page *page; int err = 0; - int i; if (f2fs_has_inline_dentry(dir)) { err = f2fs_add_inline_entry(dir, name, inode); @@ -553,14 +570,10 @@ add_dentry: err = PTR_ERR(page); goto fail; } - de = &dentry_blk->dentry[bit_pos]; - de->hash_code = dentry_hash; - de->name_len = cpu_to_le16(namelen); - memcpy(dentry_blk->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - for (i = 0; i < slots; i++) - test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + f2fs_update_dentry(inode, &d, name, dentry_hash, bit_pos); + set_page_dirty(dentry_page); /* we don't need to mark_inode_dirty now */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a978b655bacd..f8da399eb711 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1376,6 +1376,8 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); +void f2fs_update_dentry(struct inode *, struct f2fs_dentry_ptr *, + const struct qstr *, f2fs_hash_t , unsigned int); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1484c00133cd..82b74415b623 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -386,15 +386,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - struct f2fs_dir_entry *de; size_t namelen = name->len; struct f2fs_inline_dentry *dentry_blk = NULL; + struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); struct page *page; int err = 0; - int i; - - name_hash = f2fs_dentry_hash(name); ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) @@ -418,14 +415,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, } f2fs_wait_on_page_writeback(ipage, NODE); - de = &dentry_blk->dentry[bit_pos]; - de->hash_code = name_hash; - de->name_len = cpu_to_le16(namelen); - memcpy(dentry_blk->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); - for (i = 0; i < slots; i++) - test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + + name_hash = f2fs_dentry_hash(name); + make_dentry_ptr(&d, (void *)dentry_blk, 2); + f2fs_update_dentry(inode, &d, name, name_hash, bit_pos); + set_page_dirty(ipage); /* we don't need to mark_inode_dirty now */ -- cgit v1.2.3 From 97dc3fd2cbbf6dac239333083a8a005bf50c96e8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:19:22 +0800 Subject: f2fs: use ->writepage in sync_meta_pages This patch uses ->writepage of meta mapping in sync_meta_pages instead of f2fs_write_meta_page, by this way, in its caller we can ignore any changes (e.g. changing name) of this registered function. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7f794b72b3b7..6faffce01869 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -276,7 +276,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - if (f2fs_write_meta_page(page, &wbc)) { + if (mapping->a_ops->writepage(page, &wbc)) { unlock_page(page); break; } -- cgit v1.2.3 From 3c0d84d6f1dde0a85f20957954fffb209edc55ac Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Feb 2015 16:20:27 +0800 Subject: f2fs: fix incorrectly stat number of inline data inode We should stat inline data information for temp file in f2fs_tmpfile if we enable inline_data feature. Otherwise, inline data stat number will be wrong after this temp file is evicted. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e79639a9787a..1e2ae21bd6b6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -693,6 +693,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + + stat_inc_inline_inode(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); return 0; -- cgit v1.2.3 From 0c872e2dedfc09f41a5604d1c5010f800c0bd8f1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:46:29 +0800 Subject: f2fs: move ext_lock out of struct extent_info Move ext_lock out of struct extent_info, then in the following patches we can use variables with struct extent_info type as a parameter to pass pure data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/f2fs.h | 6 +----- fs/f2fs/inode.c | 7 +++++++ fs/f2fs/super.c | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 985ed023a750..0811d6509fce 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -258,9 +258,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, if (is_inode_flag_set(fi, FI_NO_EXTENT)) return 0; - read_lock(&fi->ext.ext_lock); + read_lock(&fi->ext_lock); if (fi->ext.len == 0) { - read_unlock(&fi->ext.ext_lock); + read_unlock(&fi->ext_lock); return 0; } @@ -284,10 +284,10 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, bh_result->b_size = UINT_MAX; stat_inc_read_hit(inode->i_sb); - read_unlock(&fi->ext.ext_lock); + read_unlock(&fi->ext_lock); return 1; } - read_unlock(&fi->ext.ext_lock); + read_unlock(&fi->ext_lock); return 0; } @@ -309,7 +309,7 @@ void update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; - write_lock(&fi->ext.ext_lock); + write_lock(&fi->ext_lock); start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; @@ -366,7 +366,7 @@ void update_extent_cache(struct dnode_of_data *dn) need_update = true; } end_update: - write_unlock(&fi->ext.ext_lock); + write_unlock(&fi->ext_lock); if (need_update) sync_inode_page(dn); return; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f8da399eb711..c3caa3cb5beb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -277,7 +277,6 @@ enum { #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ struct extent_info { - rwlock_t ext_lock; /* rwlock for consistency */ unsigned int fofs; /* start offset in a file */ u32 blk_addr; /* start block address of the extent */ unsigned int len; /* length of the extent */ @@ -309,6 +308,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ + rwlock_t ext_lock; /* rwlock for single extent cache */ struct inode_entry *dirty_dir; /* the pointer of dirty dir */ struct radix_tree_root inmem_root; /* radix tree for inmem pages */ @@ -319,21 +319,17 @@ struct f2fs_inode_info { static inline void get_extent_info(struct extent_info *ext, struct f2fs_extent i_ext) { - write_lock(&ext->ext_lock); ext->fofs = le32_to_cpu(i_ext.fofs); ext->blk_addr = le32_to_cpu(i_ext.blk_addr); ext->len = le32_to_cpu(i_ext.len); - write_unlock(&ext->ext_lock); } static inline void set_raw_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { - read_lock(&ext->ext_lock); i_ext->fofs = cpu_to_le32(ext->fofs); i_ext->blk_addr = cpu_to_le32(ext->blk_addr); i_ext->len = cpu_to_le32(ext->len); - read_unlock(&ext->ext_lock); } struct f2fs_nm_info { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2d002e3738a7..28dd26a50276 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -130,7 +130,10 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; + write_lock(&fi->ext_lock); get_extent_info(&fi->ext, ri->i_ext); + write_unlock(&fi->ext_lock); + get_inline_info(fi, ri); /* check data exist */ @@ -220,7 +223,11 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_links = cpu_to_le32(inode->i_nlink); ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(inode->i_blocks); + + read_lock(&F2FS_I(inode)->ext_lock); set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + read_unlock(&F2FS_I(inode)->ext_lock); + set_raw_inline(F2FS_I(inode), ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f2fe666a6ea9..4e9aad451593 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -392,7 +392,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; - rwlock_init(&fi->ext.ext_lock); + rwlock_init(&fi->ext_lock); init_rwsem(&fi->i_sem); INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); -- cgit v1.2.3 From 4d0b0bd4385f0ce8d3b430f9667c5e2ca1de10af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:47:25 +0800 Subject: f2fs: simplfy a field name in struct f2fs_extent,extent_info Rename a filed name from 'blk_addr' to 'blk' in struct {f2fs_extent,extent_info} as annotation of this field descripts its meaning well to us. By this way, we can avoid long statement in code of following patches. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 ++++++------- fs/f2fs/f2fs.h | 6 +++--- include/linux/f2fs_fs.h | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0811d6509fce..64f9049a9f25 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -268,7 +268,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; + start_blkaddr = fi->ext.blk; if (pgofs >= start_fofs && pgofs <= end_fofs) { unsigned int blkbits = inode->i_sb->s_blocksize_bits; @@ -313,8 +313,8 @@ void update_extent_cache(struct dnode_of_data *dn) start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; - start_blkaddr = fi->ext.blk_addr; - end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; + start_blkaddr = fi->ext.blk; + end_blkaddr = fi->ext.blk + fi->ext.len - 1; /* Drop and initialize the matched extent */ if (fi->ext.len == 1 && fofs == start_fofs) @@ -324,7 +324,7 @@ void update_extent_cache(struct dnode_of_data *dn) if (fi->ext.len == 0) { if (dn->data_blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = dn->data_blkaddr; + fi->ext.blk = dn->data_blkaddr; fi->ext.len = 1; } goto end_update; @@ -333,7 +333,7 @@ void update_extent_cache(struct dnode_of_data *dn) /* Front merge */ if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { fi->ext.fofs--; - fi->ext.blk_addr--; + fi->ext.blk--; fi->ext.len++; goto end_update; } @@ -351,8 +351,7 @@ void update_extent_cache(struct dnode_of_data *dn) fi->ext.len = fofs - start_fofs; } else { fi->ext.fofs = fofs + 1; - fi->ext.blk_addr = start_blkaddr + - fofs - start_fofs + 1; + fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; fi->ext.len -= fofs - start_fofs + 1; } } else { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c3caa3cb5beb..5d2e52e31fac 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -278,7 +278,7 @@ enum { struct extent_info { unsigned int fofs; /* start offset in a file */ - u32 blk_addr; /* start block address of the extent */ + u32 blk; /* start block address of the extent */ unsigned int len; /* length of the extent */ }; @@ -320,7 +320,7 @@ static inline void get_extent_info(struct extent_info *ext, struct f2fs_extent i_ext) { ext->fofs = le32_to_cpu(i_ext.fofs); - ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->blk = le32_to_cpu(i_ext.blk); ext->len = le32_to_cpu(i_ext.len); } @@ -328,7 +328,7 @@ static inline void set_raw_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { i_ext->fofs = cpu_to_le32(ext->fofs); - i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->blk = cpu_to_le32(ext->blk); i_ext->len = cpu_to_le32(ext->len); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a23556c32703..502f28cfb78e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -153,7 +153,7 @@ struct f2fs_orphan_block { */ struct f2fs_extent { __le32 fofs; /* start file offset of the extent */ - __le32 blk_addr; /* start block address of the extent */ + __le32 blk; /* start block address of the extent */ __le32 len; /* lengh of the extent */ } __packed; -- cgit v1.2.3 From a2e7d1bfebe0bc349a3eb9d01caac026627f095e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:50:30 +0800 Subject: f2fs: introduce f2fs_map_bh to clean codes of check_extent_cache This patch introduces f2fs_map_bh to clean codes of check_extent_cache. v2: o cleanup f2fs_map_bh pointed out by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 64f9049a9f25..bce0372a90d4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -248,8 +248,23 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } +static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, + struct extent_info *ei, struct buffer_head *bh_result) +{ + unsigned int blkbits = sb->s_blocksize_bits; + size_t count; + + set_buffer_new(bh_result); + map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); + count = ei->fofs + ei->len - pgofs; + if (count < (UINT_MAX >> blkbits)) + bh_result->b_size = (count << blkbits); + else + bh_result->b_size = UINT_MAX; +} + static int check_extent_cache(struct inode *inode, pgoff_t pgofs, - struct buffer_head *bh_result) + struct extent_info *ei) { struct f2fs_inode_info *fi = F2FS_I(inode); pgoff_t start_fofs, end_fofs; @@ -271,18 +286,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, start_blkaddr = fi->ext.blk; if (pgofs >= start_fofs && pgofs <= end_fofs) { - unsigned int blkbits = inode->i_sb->s_blocksize_bits; - size_t count; - - set_buffer_new(bh_result); - map_bh(bh_result, inode->i_sb, - start_blkaddr + pgofs - start_fofs); - count = end_fofs - pgofs + 1; - if (count < (UINT_MAX >> blkbits)) - bh_result->b_size = (count << blkbits); - else - bh_result->b_size = UINT_MAX; - + *ei = fi->ext; stat_inc_read_hit(inode->i_sb); read_unlock(&fi->ext_lock); return 1; @@ -658,13 +662,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock, int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; pgoff_t pgofs, end_offset; int err = 0, ofs = 1; + struct extent_info ei; bool allocated = false; /* Get the page offset from the block offset(iblock) */ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - if (check_extent_cache(inode, pgofs, bh_result)) + if (check_extent_cache(inode, pgofs, &ei)) { + f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); goto out; + } if (create) f2fs_lock_op(F2FS_I_SB(inode)); -- cgit v1.2.3 From 7e4dde79df7cdf8b40282857e030c7572ff04886 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:51:34 +0800 Subject: f2fs: introduce universal lookup/update interface for extent cache In this patch, we do these jobs: 1. rename {check,update}_extent_cache to {lookup,update}_extent_info; 2. introduce universal lookup/update interface of extent cache: f2fs_{lookup,update}_extent_cache including above two real functions, then export them to function callers. So after above cleanup, we can add new rb-tree based extent cache into exported interfaces. v2: o remove "f2fs_" for inner function {lookup,update}_extent_info suggested by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 66 +++++++++++++++++++++++++++++++++--------------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/inline.c | 2 +- fs/f2fs/recovery.c | 2 +- 5 files changed, 44 insertions(+), 30 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bce0372a90d4..3dbaa475fce4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -263,20 +263,20 @@ static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, bh_result->b_size = UINT_MAX; } -static int check_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) { struct f2fs_inode_info *fi = F2FS_I(inode); pgoff_t start_fofs, end_fofs; block_t start_blkaddr; if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return 0; + return false; read_lock(&fi->ext_lock); if (fi->ext.len == 0) { read_unlock(&fi->ext_lock); - return 0; + return false; } stat_inc_total_hit(inode->i_sb); @@ -289,29 +289,22 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, *ei = fi->ext; stat_inc_read_hit(inode->i_sb); read_unlock(&fi->ext_lock); - return 1; + return true; } read_unlock(&fi->ext_lock); - return 0; + return false; } -void update_extent_cache(struct dnode_of_data *dn) +static bool update_extent_info(struct inode *inode, pgoff_t fofs, + block_t blkaddr) { - struct f2fs_inode_info *fi = F2FS_I(dn->inode); - pgoff_t fofs, start_fofs, end_fofs; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - - /* Update the page address in the parent node */ - __set_data_blkaddr(dn); - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return; - - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; + return false; write_lock(&fi->ext_lock); @@ -326,16 +319,16 @@ void update_extent_cache(struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (dn->data_blkaddr != NULL_ADDR) { + if (blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk = dn->data_blkaddr; + fi->ext.blk = blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk--; fi->ext.len++; @@ -343,7 +336,7 @@ void update_extent_cache(struct dnode_of_data *dn) } /* Back merge */ - if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -370,9 +363,30 @@ void update_extent_cache(struct dnode_of_data *dn) } end_update: write_unlock(&fi->ext_lock); - if (need_update) + return need_update; +} + +static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + return lookup_extent_info(inode, pgofs, ei); +} + +void f2fs_update_extent_cache(struct dnode_of_data *dn) +{ + struct f2fs_inode_info *fi = F2FS_I(dn->inode); + pgoff_t fofs; + + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); + + /* Update the page address in the parent node */ + __set_data_blkaddr(dn); + + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + + if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) sync_inode_page(dn); - return; } struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) @@ -668,7 +682,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, /* Get the page offset from the block offset(iblock) */ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - if (check_extent_cache(inode, pgofs, &ei)) { + if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); goto out; } @@ -835,7 +849,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { write_data_page(page, &dn, fio); - update_extent_cache(&dn); + f2fs_update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5d2e52e31fac..f3fbfa91de8e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1513,7 +1513,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -void update_extent_cache(struct dnode_of_data *); +void f2fs_update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 98dac27bc3f7..baadaf2afc6c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -433,7 +433,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) continue; dn->data_blkaddr = NULL_ADDR; - update_extent_cache(dn); + f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); nr_free++; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 82b74415b623..4ba97320d194 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -122,7 +122,7 @@ no_update: set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); - update_extent_cache(dn); + f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 41afb9534bbd..188a03449c5c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -401,7 +401,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); dn.data_blkaddr = dest; - update_extent_cache(&dn); + f2fs_update_extent_cache(&dn); recovered++; } dn.ofs_in_node++; -- cgit v1.2.3 From 13054c548a1c9e78f8f8ba5f134909cb56152285 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:52:58 +0800 Subject: f2fs: introduce infra macro and data structure of rb-tree extent cache Introduce infra macro and data structure for rb-tree based extent cache: Macros: * EXT_TREE_VEC_SIZE: indicate vector size for gang lookup in extent tree. * F2FS_MIN_EXTENT_LEN: indicate minimum length of extent managed in cache. * EXTENT_CACHE_SHRINK_NUMBER: indicate number of extent in cache will be shrunk. Basic data structures for extent cache: * struct extent_tree: extent tree entry per inode. * struct extent_node: extent info node linked in extent tree. Besides, adding new extent cache related fields in f2fs_sb_info. Signed-off-by: Chao Yu Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 36 ++++++++++++++++++++++++++++++++---- fs/f2fs/node.h | 1 + 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f3fbfa91de8e..a214cce9fe70 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -273,13 +273,33 @@ enum { #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ +/* vector size for gang look-up from extent cache that consists of radix tree */ +#define EXT_TREE_VEC_SIZE 64 + /* for in-memory extent cache entry */ -#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ +#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ + +/* number of extent info in extent cache we try to shrink */ +#define EXTENT_CACHE_SHRINK_NUMBER 128 struct extent_info { - unsigned int fofs; /* start offset in a file */ - u32 blk; /* start block address of the extent */ - unsigned int len; /* length of the extent */ + unsigned int fofs; /* start offset in a file */ + u32 blk; /* start block address of the extent */ + unsigned int len; /* length of the extent */ +}; + +struct extent_node { + struct rb_node rb_node; /* rb node located in rb-tree */ + struct list_head list; /* node in global extent list of sbi */ + struct extent_info ei; /* extent info */ +}; + +struct extent_tree { + nid_t ino; /* inode number */ + struct rb_root root; /* root of extent info rb-tree */ + rwlock_t lock; /* protect extent info rb-tree */ + atomic_t refcount; /* reference count of rb-tree */ + unsigned int count; /* # of extent node in rb-tree*/ }; /* @@ -567,6 +587,14 @@ struct f2fs_sb_info { struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ + /* for extent tree cache */ + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + int total_ext_tree; /* extent tree count */ + atomic_t total_ext_node; /* extent info count */ + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index f405bbf2435a..c56026f1725c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -120,6 +120,7 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ + EXTENT_CACHE, /* indicates extent cache */ BASE_CHECK, /* check kernel status */ }; -- cgit v1.2.3 From 429511cdf8b3a9b894b914f282a9293df405a449 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:54:31 +0800 Subject: f2fs: add core functions for rb-tree extent cache This patch adds core functions including slab cache init function and init/lookup/update/shrink/destroy function for rb-tree based extent cache. Thank Jaegeuk Kim and Changman Lee as they gave much suggestion about detail design and implementation of extent cache. Todo: * register rb-based extent cache shrink with mm shrink interface. v2: o move set_extent_info and __is_{extent,back,front}_mergeable into f2fs.h. o introduce __{attach,detach}_extent_node for code readability. o add cond_resched() when fail to invoke kmem_cache_alloc/radix_tree_insert. o fix some coding style and typo issues. v3: o fix oops due to using an unassigned pointer. o use list_del to remove extent node in shrink list. Signed-off-by: Chao Yu Signed-off-by: Changman Lee [Jaegeuk Kim: add static for some funcitons and declare in f2fs.h] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 411 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 32 +++++ fs/f2fs/node.c | 9 +- 3 files changed, 451 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3dbaa475fce4..f52745346205 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -25,6 +25,9 @@ #include "trace.h" #include +static struct kmem_cache *extent_tree_slab; +static struct kmem_cache *extent_node_slab; + static void f2fs_read_end_io(struct bio *bio, int err) { struct bio_vec *bvec; @@ -366,6 +369,383 @@ end_update: return need_update; } +static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_info *ei, + struct rb_node *parent, struct rb_node **p) +{ + struct extent_node *en; + + en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); + if (!en) + return NULL; + + en->ei = *ei; + INIT_LIST_HEAD(&en->list); + + rb_link_node(&en->rb_node, parent, p); + rb_insert_color(&en->rb_node, &et->root); + et->count++; + atomic_inc(&sbi->total_ext_node); + return en; +} + +static void __detach_extent_node(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + rb_erase(&en->rb_node, &et->root); + et->count--; + atomic_dec(&sbi->total_ext_node); +} + +static struct extent_node *__lookup_extent_tree(struct extent_tree *et, + unsigned int fofs) +{ + struct rb_node *node = et->root.rb_node; + struct extent_node *en; + + while (node) { + en = rb_entry(node, struct extent_node, rb_node); + + if (fofs < en->ei.fofs) + node = node->rb_left; + else if (fofs >= en->ei.fofs + en->ei.len) + node = node->rb_right; + else + return en; + } + return NULL; +} + +static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + struct extent_node *prev; + struct rb_node *node; + + node = rb_prev(&en->rb_node); + if (!node) + return NULL; + + prev = rb_entry(node, struct extent_node, rb_node); + if (__is_back_mergeable(&en->ei, &prev->ei)) { + en->ei.fofs = prev->ei.fofs; + en->ei.blk = prev->ei.blk; + en->ei.len += prev->ei.len; + __detach_extent_node(sbi, et, prev); + return prev; + } + return NULL; +} + +static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_node *en) +{ + struct extent_node *next; + struct rb_node *node; + + node = rb_next(&en->rb_node); + if (!node) + return NULL; + + next = rb_entry(node, struct extent_node, rb_node); + if (__is_front_mergeable(&en->ei, &next->ei)) { + en->ei.len += next->ei.len; + __detach_extent_node(sbi, et, next); + return next; + } + return NULL; +} + +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, + struct extent_tree *et, struct extent_info *ei, + struct extent_node **den) +{ + struct rb_node **p = &et->root.rb_node; + struct rb_node *parent = NULL; + struct extent_node *en; + + while (*p) { + parent = *p; + en = rb_entry(parent, struct extent_node, rb_node); + + if (ei->fofs < en->ei.fofs) { + if (__is_front_mergeable(ei, &en->ei)) { + f2fs_bug_on(sbi, !den); + en->ei.fofs = ei->fofs; + en->ei.blk = ei->blk; + en->ei.len += ei->len; + *den = __try_back_merge(sbi, et, en); + return en; + } + p = &(*p)->rb_left; + } else if (ei->fofs >= en->ei.fofs + en->ei.len) { + if (__is_back_mergeable(ei, &en->ei)) { + f2fs_bug_on(sbi, !den); + en->ei.len += ei->len; + *den = __try_front_merge(sbi, et, en); + return en; + } + p = &(*p)->rb_right; + } else { + f2fs_bug_on(sbi, 1); + } + } + + return __attach_extent_node(sbi, et, ei, parent, p); +} + +static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, + struct extent_tree *et, bool free_all) +{ + struct rb_node *node, *next; + struct extent_node *en; + unsigned int count = et->count; + + node = rb_first(&et->root); + while (node) { + next = rb_next(node); + en = rb_entry(node, struct extent_node, rb_node); + + if (free_all) { + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_del_init(&en->list); + spin_unlock(&sbi->extent_lock); + } + + if (free_all || list_empty(&en->list)) { + __detach_extent_node(sbi, et, en); + kmem_cache_free(extent_node_slab, en); + } + node = next; + } + + return count - et->count; +} + +static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en; + + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return false; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + return false; + } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); + + read_lock(&et->lock); + en = __lookup_extent_tree(et, pgofs); + if (en) { + *ei = en->ei; + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_move_tail(&en->list, &sbi->extent_list); + spin_unlock(&sbi->extent_lock); + stat_inc_read_hit(sbi->sb); + } + stat_inc_total_hit(sbi->sb); + read_unlock(&et->lock); + + atomic_dec(&et->refcount); + return en ? true : false; +} + +static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, + block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + nid_t ino = inode->i_ino; + struct extent_tree *et; + struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; + struct extent_node *den = NULL; + struct extent_info ei, dei; + unsigned int endofs; + + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return; + + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); + f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + memset(et, 0, sizeof(struct extent_tree)); + et->ino = ino; + et->root = RB_ROOT; + rwlock_init(&et->lock); + atomic_set(&et->refcount, 0); + et->count = 0; + sbi->total_ext_tree++; + } + atomic_inc(&et->refcount); + up_write(&sbi->extent_tree_lock); + + write_lock(&et->lock); + + /* 1. lookup and remove existing extent info in cache */ + en = __lookup_extent_tree(et, fofs); + if (!en) + goto update_extent; + + dei = en->ei; + __detach_extent_node(sbi, et, en); + + /* 2. if extent can be split more, split and insert the left part */ + if (dei.len > 1) { + /* insert left part of split extent into cache */ + if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { + set_extent_info(&ei, dei.fofs, dei.blk, + fofs - dei.fofs); + en1 = __insert_extent_tree(sbi, et, &ei, NULL); + } + + /* insert right part of split extent into cache */ + endofs = dei.fofs + dei.len - 1; + if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { + set_extent_info(&ei, fofs + 1, + fofs - dei.fofs + dei.blk, endofs - fofs); + en2 = __insert_extent_tree(sbi, et, &ei, NULL); + } + } + +update_extent: + /* 3. update extent in extent cache */ + if (blkaddr) { + set_extent_info(&ei, fofs, blkaddr, 1); + en3 = __insert_extent_tree(sbi, et, &ei, &den); + } + + /* 4. update in global extent list */ + spin_lock(&sbi->extent_lock); + if (en && !list_empty(&en->list)) + list_del(&en->list); + /* + * en1 and en2 split from en, they will become more and more smaller + * fragments after splitting several times. So if the length is smaller + * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. + */ + if (en1) + list_add_tail(&en1->list, &sbi->extent_list); + if (en2) + list_add_tail(&en2->list, &sbi->extent_list); + if (en3) { + if (list_empty(&en3->list)) + list_add_tail(&en3->list, &sbi->extent_list); + else + list_move_tail(&en3->list, &sbi->extent_list); + } + if (den && !list_empty(&den->list)) + list_del(&den->list); + spin_unlock(&sbi->extent_lock); + + /* 5. release extent node */ + if (en) + kmem_cache_free(extent_node_slab, en); + if (den) + kmem_cache_free(extent_node_slab, den); + + write_unlock(&et->lock); + atomic_dec(&et->refcount); +} + +void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; + struct extent_node *en, *tmp; + unsigned long ino = F2FS_ROOT_INO(sbi); + struct radix_tree_iter iter; + void **slot; + unsigned int found; + + if (available_free_memory(sbi, EXTENT_CACHE)) + return; + + spin_lock(&sbi->extent_lock); + list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { + if (!nr_shrink--) + break; + list_del_init(&en->list); + } + spin_unlock(&sbi->extent_lock); + + down_read(&sbi->extent_tree_lock); + while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, + (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { + unsigned i; + + ino = treevec[found - 1]->ino + 1; + for (i = 0; i < found; i++) { + struct extent_tree *et = treevec[i]; + + atomic_inc(&et->refcount); + write_lock(&et->lock); + __free_extent_tree(sbi, et, false); + write_unlock(&et->lock); + atomic_dec(&et->refcount); + } + } + up_read(&sbi->extent_tree_lock); + + down_write(&sbi->extent_tree_lock); + radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, + F2FS_ROOT_INO(sbi)) { + struct extent_tree *et = (struct extent_tree *)*slot; + + if (!atomic_read(&et->refcount) && !et->count) { + radix_tree_delete(&sbi->extent_tree_root, et->ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + } + } + up_write(&sbi->extent_tree_lock); +} + +void f2fs_destroy_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + goto out; + } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); + + /* free all extent info belong to this extent tree */ + write_lock(&et->lock); + __free_extent_tree(sbi, et, true); + write_unlock(&et->lock); + + atomic_dec(&et->refcount); + + /* try to find and delete extent tree entry in radix tree */ + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); + if (!et) { + up_write(&sbi->extent_tree_lock); + goto out; + } + f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); + radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + up_write(&sbi->extent_tree_lock); +out: + return; +} + static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -1256,6 +1636,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, get_data_block); } +void init_extent_cache_info(struct f2fs_sb_info *sbi) +{ + INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); + init_rwsem(&sbi->extent_tree_lock); + INIT_LIST_HEAD(&sbi->extent_list); + spin_lock_init(&sbi->extent_lock); + sbi->total_ext_tree = 0; + atomic_set(&sbi->total_ext_node, 0); +} + +int __init create_extent_cache(void) +{ + extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", + sizeof(struct extent_tree)); + if (!extent_tree_slab) + return -ENOMEM; + extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", + sizeof(struct extent_node)); + if (!extent_node_slab) { + kmem_cache_destroy(extent_tree_slab); + return -ENOMEM; + } + return 0; +} + +void destroy_extent_cache(void) +{ + kmem_cache_destroy(extent_node_slab); + kmem_cache_destroy(extent_tree_slab); +} + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a214cce9fe70..a0af10dad871 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -352,6 +352,33 @@ static inline void set_raw_extent(struct extent_info *ext, i_ext->len = cpu_to_le32(ext->len); } +static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, + u32 blk, unsigned int len) +{ + ei->fofs = fofs; + ei->blk = blk; + ei->len = len; +} + +static inline bool __is_extent_mergeable(struct extent_info *back, + struct extent_info *front) +{ + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); +} + +static inline bool __is_back_mergeable(struct extent_info *cur, + struct extent_info *back) +{ + return __is_extent_mergeable(back, cur); +} + +static inline bool __is_front_mergeable(struct extent_info *cur, + struct extent_info *front) +{ + return __is_extent_mergeable(cur, front); +} + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ @@ -1541,12 +1568,17 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); +void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); +void f2fs_destroy_extent_tree(struct inode *); void f2fs_update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void init_extent_cache_info(struct f2fs_sb_info *); +int __init create_extent_cache(void); +void destroy_extent_cache(void); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 97bd9d3db882..28423697e97a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) /* only uses low memory */ avail_ram = val.totalram - val.totalhigh; - /* give 25%, 25%, 50%, 50% memory for each components respectively */ + /* + * give 25%, 25%, 50%, 50%, 50% memory for each components respectively + */ if (type == FREE_NIDS) { mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> PAGE_CACHE_SHIFT; @@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == EXTENT_CACHE) { + mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + + atomic_read(&sbi->total_ext_node) * + sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else { if (sbi->sb->s_bdi->dirty_exceeded) return false; -- cgit v1.2.3 From 8967215954a50947fbd9c2996232548bf6dd9062 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:55:51 +0800 Subject: f2fs: add a mount option for rb-tree extent cache This patch adds a mount option 'extent_cache' in f2fs. It is try to use a rb-tree based extent cache to cache more mapping information with less memory if this option is set, otherwise we will use the original one extent info cache. Suggested-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 ++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++++ 3 files changed, 12 insertions(+) (limited to 'fs/f2fs') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index dac11d7fef27..48e2123c2582 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -140,6 +140,10 @@ nobarrier This option can be used if underlying storage guarantees fastboot This option is used when a system wants to reduce mount time as much as possible, even though normal performance can be sacrificed. +extent_cache Enable an extent cache based on rb-tree, it can cache + as many as extent which map between contiguous logical + address and physical address per inode, resulting in + increasing the cache hit ratio. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a0af10dad871..759fbaaa6ca1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -50,6 +50,7 @@ #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 +#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4e9aad451593..3ce4d7c9a29d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -57,6 +57,7 @@ enum { Opt_flush_merge, Opt_nobarrier, Opt_fastboot, + Opt_extent_cache, Opt_err, }; @@ -78,6 +79,7 @@ static match_table_t f2fs_tokens = { {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, + {Opt_extent_cache, "extent_cache"}, {Opt_err, NULL}, }; @@ -367,6 +369,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_fastboot: set_opt(sbi, FASTBOOT); break; + case Opt_extent_cache: + set_opt(sbi, EXTENT_CACHE); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -599,6 +604,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",nobarrier"); if (test_opt(sbi, FASTBOOT)) seq_puts(seq, ",fastboot"); + if (test_opt(sbi, EXTENT_CACHE)) + seq_puts(seq, ",extent_cache"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; -- cgit v1.2.3 From 1dcc336b02bff3d38f173feac55a2b6c25a5fb54 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:57:31 +0800 Subject: f2fs: enable rb-tree extent cache This patch enables rb-tree based extent cache in f2fs. When we mount with "-o extent_cache", f2fs will try to add recently accessed page-block mappings into rb-tree based extent cache as much as possible, instead of original one extent info cache. By this way, f2fs can support more effective cache between dnode page cache and disk. It will supply high hit ratio in the cache with fewer memory when dnode page cache are reclaimed in environment of low memory. Storage: Sandisk sd card 64g 1.append write file (offset: 0, size: 128M); 2.override write file (offset: 2M, size: 1M); 3.override write file (offset: 4M, size: 1M); ... 4.override write file (offset: 48M, size: 1M); ... 5.override write file (offset: 112M, size: 1M); 6.sync 7.echo 3 > /proc/sys/vm/drop_caches 8.read file (size:128M, unit: 4k, count: 32768) (time dd if=/mnt/f2fs/128m bs=4k count=32768) Extent Hit Ratio: before patched Hit Ratio 121 / 1071 1071 / 1071 Performance: before patched real 0m37.051s 0m35.556s user 0m0.040s 0m0.026s sys 0m2.990s 0m2.251s Memory Cost: before patched Tree Count: 0 1 (size: 24 bytes) Node Count: 0 45 (size: 1440 bytes) v3: o retest and given more details of test result. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 +++++++++++++ fs/f2fs/inode.c | 1 + fs/f2fs/segment.c | 3 +++ fs/f2fs/super.c | 9 ++++++++- 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f52745346205..acdc0767f77c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -666,6 +666,9 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) void **slot; unsigned int found; + if (!test_opt(sbi, EXTENT_CACHE)) + return; + if (available_free_memory(sbi, EXTENT_CACHE)) return; @@ -714,6 +717,9 @@ void f2fs_destroy_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; + if (!test_opt(sbi, EXTENT_CACHE)) + return; + down_read(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); if (!et) { @@ -749,6 +755,9 @@ out: static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + return f2fs_lookup_extent_tree(inode, pgofs, ei); + return lookup_extent_info(inode, pgofs, ei); } @@ -765,6 +774,10 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; + if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) + return f2fs_update_extent_tree(dn->inode, fofs, + dn->data_blkaddr); + if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) sync_inode_page(dn); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 28dd26a50276..b5087443be41 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -335,6 +335,7 @@ void f2fs_evict_inode(struct inode *inode) no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + f2fs_destroy_extent_tree(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index daee4ab913da..29c04edcb843 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -277,6 +277,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { + /* try to shrink extent cache when there is no enough memory */ + f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || excess_prefree_segs(sbi) || diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3ce4d7c9a29d..e649f21121d2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1079,6 +1079,8 @@ try_onemore: INIT_LIST_HEAD(&sbi->dir_inode_list); spin_lock_init(&sbi->dir_inode_lock); + init_extent_cache_info(sbi); + init_ino_entry_info(sbi); /* setup f2fs internal modules */ @@ -1285,10 +1287,13 @@ static int __init init_f2fs_fs(void) err = create_checkpoint_caches(); if (err) goto free_segment_manager_caches; + err = create_extent_cache(); + if (err) + goto free_checkpoint_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) { err = -ENOMEM; - goto free_checkpoint_caches; + goto free_extent_cache; } err = register_filesystem(&f2fs_fs_type); if (err) @@ -1299,6 +1304,8 @@ static int __init init_f2fs_fs(void) free_kset: kset_unregister(f2fs_kset); +free_extent_cache: + destroy_extent_cache(); free_checkpoint_caches: destroy_checkpoint_caches(); free_segment_manager_caches: -- cgit v1.2.3 From 4bf6fd9fedf14a8bab75f3710cd63e7bc8928014 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:58:28 +0800 Subject: f2fs: show extent tree, node stat info in debugfs This patch add and show stat info of total memory footprint for extent tree,node in debugfs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++++++ fs/f2fs/f2fs.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index e671373cc8ab..1006290ed6b8 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) /* validation check of the segment numbers */ si->hit_ext = sbi->read_hit_ext; si->total_ext = sbi->total_hit_ext; + si->ext_tree = sbi->total_ext_tree; + si->ext_node = atomic_read(&sbi->total_ext_node); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; @@ -185,6 +187,9 @@ get_cache: si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); + si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); + si->cache_mem += atomic_read(&sbi->total_ext_node) * + sizeof(struct extent_node); si->page_mem = 0; npages = NODE_MAPPING(sbi)->nrpages; @@ -267,6 +272,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - node blocks : %d\n", si->node_blks); seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); + seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); + seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - inmem: %4d, wb: %4d\n", si->inmem_pages, si->wb_pages); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 759fbaaa6ca1..85ce9b30b539 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1607,7 +1607,7 @@ struct f2fs_stat_info { struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - int hit_ext, total_ext; + int hit_ext, total_ext, ext_tree, ext_node; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; -- cgit v1.2.3 From 1ec4610c522cc51219cc022ef120a928828fa934 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:59:59 +0800 Subject: f2fs: add trace for rb-tree extent cache ops This patch adds trace for lookup/update/shrink/destroy ops in rb-tree extent cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 +++++- include/trace/events/f2fs.h | 134 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index acdc0767f77c..d7ff4ca5be18 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -533,6 +533,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) return false; + trace_f2fs_lookup_extent_tree_start(inode, pgofs); + down_read(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); if (!et) { @@ -555,6 +557,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, stat_inc_total_hit(sbi->sb); read_unlock(&et->lock); + trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); + atomic_dec(&et->refcount); return en ? true : false; } @@ -573,6 +577,8 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) return; + trace_f2fs_update_extent_tree(inode, fofs, blkaddr); + down_write(&sbi->extent_tree_lock); et = radix_tree_lookup(&sbi->extent_tree_root, ino); if (!et) { @@ -665,6 +671,7 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) struct radix_tree_iter iter; void **slot; unsigned int found; + unsigned int node_cnt = 0, tree_cnt = 0; if (!test_opt(sbi, EXTENT_CACHE)) return; @@ -691,7 +698,7 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) atomic_inc(&et->refcount); write_lock(&et->lock); - __free_extent_tree(sbi, et, false); + node_cnt += __free_extent_tree(sbi, et, false); write_unlock(&et->lock); atomic_dec(&et->refcount); } @@ -707,15 +714,19 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) radix_tree_delete(&sbi->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); sbi->total_ext_tree--; + tree_cnt++; } } up_write(&sbi->extent_tree_lock); + + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); } void f2fs_destroy_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; + unsigned int node_cnt = 0; if (!test_opt(sbi, EXTENT_CACHE)) return; @@ -731,7 +742,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* free all extent info belong to this extent tree */ write_lock(&et->lock); - __free_extent_tree(sbi, et, true); + node_cnt = __free_extent_tree(sbi, et, true); write_unlock(&et->lock); atomic_dec(&et->refcount); @@ -749,6 +760,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) sbi->total_ext_tree--; up_write(&sbi->extent_tree_lock); out: + trace_f2fs_destroy_extent_tree(inode, node_cnt); return; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 5422dbfaf97d..09a56dc75f8d 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1011,6 +1011,140 @@ TRACE_EVENT(f2fs_issue_flush, __entry->nobarrier ? "skip (nobarrier)" : "issue", __entry->flush_merge ? " with flush_merge" : "") ); + +TRACE_EVENT(f2fs_lookup_extent_tree_start, + + TP_PROTO(struct inode *inode, unsigned int pgofs), + + TP_ARGS(inode, pgofs), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u", + show_dev_ino(__entry), + __entry->pgofs) +); + +TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, + + TP_PROTO(struct inode *inode, unsigned int pgofs, + struct extent_node *en), + + TP_ARGS(inode, pgofs, en), + + TP_CONDITION(en), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, fofs) + __field(u32, blk) + __field(unsigned int, len) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->fofs = en->ei.fofs; + __entry->blk = en->ei.blk; + __entry->len = en->ei.len; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "ext_info(fofs: %u, blk: %u, len: %u)", + show_dev_ino(__entry), + __entry->pgofs, + __entry->fofs, + __entry->blk, + __entry->len) +); + +TRACE_EVENT(f2fs_update_extent_tree, + + TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr), + + TP_ARGS(inode, pgofs, blkaddr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(u32, blk) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->blk = blkaddr; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u", + show_dev_ino(__entry), + __entry->pgofs, + __entry->blk) +); + +TRACE_EVENT(f2fs_shrink_extent_tree, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, + unsigned int tree_cnt), + + TP_ARGS(sbi, node_cnt, tree_cnt), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, node_cnt) + __field(unsigned int, tree_cnt) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->node_cnt = node_cnt; + __entry->tree_cnt = tree_cnt; + ), + + TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", + show_dev(__entry), + __entry->node_cnt, + __entry->tree_cnt) +); + +TRACE_EVENT(f2fs_destroy_extent_tree, + + TP_PROTO(struct inode *inode, unsigned int node_cnt), + + TP_ARGS(inode, node_cnt), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, node_cnt) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->node_cnt = node_cnt; + ), + + TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u", + show_dev_ino(__entry), + __entry->node_cnt) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 62c8af651b37490c18a42c02586fa6a4fb39320a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 18:01:39 +0800 Subject: f2fs: support fast lookup in extent cache This patch adds a fast lookup path for rb-tree extent cache. In this patch we add a recently accessed extent node pointer 'cached_en' in extent tree. In lookup path of extent cache, we will firstly lookup the last accessed extent node which cached_en points, if we do not hit in this node, we will try to lookup extent node in rb-tree. By this way we can avoid unnecessary slow lookup in rb-tree sometimes. Note that, side-effect of this patch is that we will increase memory cost, because we will store a pointer variable in each struct extent tree additionally. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 19 ++++++++++++++++--- fs/f2fs/f2fs.h | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d7ff4ca5be18..08a71ae3ab8d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -395,6 +395,9 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, rb_erase(&en->rb_node, &et->root); et->count--; atomic_dec(&sbi->total_ext_node); + + if (et->cached_en == en) + et->cached_en = NULL; } static struct extent_node *__lookup_extent_tree(struct extent_tree *et, @@ -403,15 +406,24 @@ static struct extent_node *__lookup_extent_tree(struct extent_tree *et, struct rb_node *node = et->root.rb_node; struct extent_node *en; + if (et->cached_en) { + struct extent_info *cei = &et->cached_en->ei; + + if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) + return et->cached_en; + } + while (node) { en = rb_entry(node, struct extent_node, rb_node); - if (fofs < en->ei.fofs) + if (fofs < en->ei.fofs) { node = node->rb_left; - else if (fofs >= en->ei.fofs + en->ei.len) + } else if (fofs >= en->ei.fofs + en->ei.len) { node = node->rb_right; - else + } else { + et->cached_en = en; return en; + } } return NULL; } @@ -587,6 +599,7 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; et->root = RB_ROOT; + et->cached_en = NULL; rwlock_init(&et->lock); atomic_set(&et->refcount, 0); et->count = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 85ce9b30b539..08fc7e0d5e4a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -298,6 +298,7 @@ struct extent_node { struct extent_tree { nid_t ino; /* inode number */ struct rb_root root; /* root of extent info rb-tree */ + struct extent_node *cached_en; /* recently accessed extent node */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t refcount; /* reference count of rb-tree */ unsigned int count; /* # of extent node in rb-tree*/ -- cgit v1.2.3 From 91c5d9bce7fef638add2cd36676e157354784d0f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 18:02:44 +0800 Subject: f2fs: switch to check FI_NO_EXTENT in f2fs_{lookup,update}_extent_cache This patch switch to check FI_NO_EXTENT in f2fs_{lookup,update}_extent_cache instead of f2fs_{lookup,update}_extent_tree or {lookup,update}_extent_info. No functionality modification in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 08a71ae3ab8d..ad99b7a295b4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -273,9 +273,6 @@ static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, pgoff_t start_fofs, end_fofs; block_t start_blkaddr; - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return false; - read_lock(&fi->ext_lock); if (fi->ext.len == 0) { read_unlock(&fi->ext_lock); @@ -306,9 +303,6 @@ static bool update_extent_info(struct inode *inode, pgoff_t fofs, block_t start_blkaddr, end_blkaddr; int need_update = true; - if (is_inode_flag_set(fi, FI_NO_EXTENT)) - return false; - write_lock(&fi->ext_lock); start_fofs = fi->ext.fofs; @@ -542,9 +536,6 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_tree *et; struct extent_node *en; - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) - return false; - trace_f2fs_lookup_extent_tree_start(inode, pgofs); down_read(&sbi->extent_tree_lock); @@ -586,9 +577,6 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, struct extent_info ei, dei; unsigned int endofs; - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) - return; - trace_f2fs_update_extent_tree(inode, fofs, blkaddr); down_write(&sbi->extent_tree_lock); @@ -780,6 +768,9 @@ out: static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { + if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + return false; + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) return f2fs_lookup_extent_tree(inode, pgofs, ei); @@ -796,6 +787,9 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) /* Update the page address in the parent node */ __set_data_blkaddr(dn); + if (is_inode_flag_set(fi, FI_NO_EXTENT)) + return; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; -- cgit v1.2.3 From cb3bc9ee06f708f3b615b6ce119b907d6a4c8a5d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 18:03:40 +0800 Subject: f2fs: use extent cache for dir We update extent cache for all user inode of f2fs including dir inode, so this patch gives another chance to try to get physical address of page from extent cache for dir inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ad99b7a295b4..ae5c41444e8f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -806,6 +806,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; + struct extent_info ei; int err; struct f2fs_io_info fio = { .type = DATA, @@ -817,6 +818,11 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; f2fs_put_page(page, 0); + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) @@ -830,6 +836,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (unlikely(dn.data_blkaddr == NEW_ADDR)) return ERR_PTR(-EINVAL); +got_it: page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -864,6 +871,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; + struct extent_info ei; int err; struct f2fs_io_info fio = { .type = DATA, @@ -874,6 +882,11 @@ repeat: if (!page) return ERR_PTR(-ENOMEM); + if (f2fs_lookup_extent_cache(inode, index, &ei)) { + dn.data_blkaddr = ei.blk + index - ei.fofs; + goto got_it; + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) { @@ -887,6 +900,7 @@ repeat: return ERR_PTR(-ENOENT); } +got_it: if (PageUptodate(page)) return page; -- cgit v1.2.3 From 1614091dc1551d1ddeda7420df8ab5ad89b04987 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 23 Feb 2015 19:59:52 -0800 Subject: f2fs: remove obsolete code This patch removes obsolete code in which summary variable is not needed. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 188a03449c5c..b2a92d47c5d7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -407,8 +407,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, dn.ofs_in_node++; } - /* write node page in place */ - set_summary(&sum, dn.nid, 0, 0); if (IS_INODE(dn.node_page)) sync_inode_page(&dn); -- cgit v1.2.3 From 8fbc418f99caf65a53f3921ac2a32a0a0af7aba2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Feb 2015 18:01:46 -0800 Subject: f2fs: avoid wrong error during recovery During the roll-forward recovery, -ENOENT for f2fs_iget can be skipped. So, this error value should not be propagated. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b2a92d47c5d7..6e40874dfbb9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -212,8 +212,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - if (err == -ENOENT) + if (err == -ENOENT) { + err = 0; goto next; + } break; } list_add_tail(&entry->list, head); -- cgit v1.2.3 From 1abff93d01eddaab7ccdcf15f8cda4d91cbbcd9a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 8 Jan 2015 19:15:53 -0800 Subject: f2fs: support fs shutdown This patch introduces a generic ioctl for fs shutdown, which was used by xfs. If this shutdown is triggered, filesystem stops any further IOs according to the following options. 1. FS_GOING_DOWN_FULLSYNC : this will flush all the data and dentry blocks, and do checkpoint before shutdown. 2. FS_GOING_DOWN_METASYNC : this will do checkpoint before shutdown. 3. FS_GOING_DOWN_NOSYNC : this will trigger shutdown as is. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 9 +++++++++ fs/f2fs/file.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 08fc7e0d5e4a..51d97f7b77f0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -217,6 +217,15 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +/* + * should be same as XFS_IOC_GOINGDOWN. + * Flags for going down operation used by FS_IOC_GOINGDOWN + */ +#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ +#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ +#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ +#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* * ioctl commands in 32 bit emulation diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index baadaf2afc6c..99cec04fa96e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1029,6 +1029,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) return ret; } +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct super_block *sb = sbi->sb; + __u32 in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + switch (in) { + case F2FS_GOING_DOWN_FULLSYNC: + sb = freeze_bdev(sb->s_bdev); + if (sb && !IS_ERR(sb)) { + f2fs_stop_checkpoint(sbi); + thaw_bdev(sb->s_bdev, sb); + } + break; + case F2FS_GOING_DOWN_METASYNC: + /* do checkpoint only */ + f2fs_sync_fs(sb, 1); + f2fs_stop_checkpoint(sbi); + break; + case F2FS_GOING_DOWN_NOSYNC: + f2fs_stop_checkpoint(sbi); + break; + default: + return -EINVAL; + } + return 0; +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1078,6 +1113,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_release_volatile_write(filp); case F2FS_IOC_ABORT_VOLATILE_WRITE: return f2fs_ioc_abort_volatile_write(filp); + case F2FS_IOC_SHUTDOWN: + return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); default: -- cgit v1.2.3 From 551414861fbd494d58d50f4750d1d1b7f42b6df1 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 26 Feb 2015 07:57:20 +0800 Subject: f2fs: introduce macro __cp_payload This patch introduce macro __cp_payload. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++---- fs/f2fs/f2fs.h | 7 ++++++- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6faffce01869..c7cafd8d522c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -471,8 +471,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) set_sbi_flag(sbi, SBI_POR_DOING); - start_blk = __start_cp_addr(sbi) + 1 + - le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blkaddr = __start_sum_addr(sbi) - 1; ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); @@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; - unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; @@ -884,7 +883,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; void *kaddr; int i; - int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + int cp_payload_blks = __cp_payload(sbi); /* * This avoids to conduct wrong roll-forward operations and uses diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51d97f7b77f0..7ced71b69f33 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -982,12 +982,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) return 0; } +static inline block_t __cp_payload(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); +} + static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; - if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; else -- cgit v1.2.3 From 3c64298579a1343cbdf3d2f17adf774a58546aae Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 26 Feb 2015 07:57:21 +0800 Subject: f2fs: fix the number of orphan inode blocks cp_pack_start_sum is calculated in do_checkpoint and is equal to cpu_to_le32(1 + cp_payload_blks + orphan_blocks). The number of orphan inode blocks is take advantage of by recover_orphan_inodes to readahead meta pages and recovery inodes. However, current codes forget to reduce the number of cp payload blocks when calculate the number of orphan inode blocks. This patch fix it. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c7cafd8d522c..4d5e697d82f6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -464,7 +464,7 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) void recover_orphan_inodes(struct f2fs_sb_info *sbi) { - block_t start_blk, orphan_blkaddr, i, j; + block_t start_blk, orphan_blocks, i, j; if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return; @@ -472,11 +472,11 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) set_sbi_flag(sbi, SBI_POR_DOING); start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); - orphan_blkaddr = __start_sum_addr(sbi) - 1; + orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); - ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); + ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP); - for (i = 0; i < orphan_blkaddr; i++) { + for (i = 0; i < orphan_blocks; i++) { struct page *page = get_meta_page(sbi, start_blk + i); struct f2fs_orphan_block *orphan_blk; -- cgit v1.2.3 From 2bca1e2388a8a9e8a3db0daf54fcc124516a3e83 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 Feb 2015 19:25:01 -0800 Subject: f2fs: clear page's up-to-date if block was deallocated If page's on-disk block was deallocated, let's remove up-to-date flag to avoid further access with wrong contents. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/dir.c | 1 + fs/f2fs/node.c | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ae5c41444e8f..62e31b2aa131 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1259,8 +1259,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) fio->blk_addr = dn.data_blkaddr; /* This page is already truncated */ - if (fio->blk_addr == NULL_ADDR) + if (fio->blk_addr == NULL_ADDR) { + ClearPageUptodate(page); goto out_writepage; + } set_page_writeback(page); @@ -1355,6 +1357,8 @@ done: clear_cold_data(page); out: inode_dec_dirty_pages(inode); + if (err) + ClearPageUptodate(page); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 583896cb5346..590aeef6f85e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -682,6 +682,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); clear_page_dirty_for_io(page); + ClearPagePrivate(page); ClearPageUptodate(page); inode_dec_dirty_pages(dir); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 28423697e97a..35a911722d59 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1002,6 +1002,7 @@ static int read_node_page(struct page *page, int rw) get_node_info(sbi, page->index, &ni); if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); f2fs_put_page(page, 1); return -ENOENT; } @@ -1313,6 +1314,7 @@ static int f2fs_write_node_page(struct page *page, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { + ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); unlock_page(page); return 0; -- cgit v1.2.3 From b7f204cca4b7155f47e64555614b1f534a53737e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 25 Feb 2015 19:54:48 -0800 Subject: f2fs: check its block allocation to avoid producing wrong dirty pages If a page is cached but its block was deallocated, we don't need to make the page dirty again by gc and truncate_partial_data_page. In that case, it needs to check its block allocation all the time instead of giving up-to-date page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 62e31b2aa131..389fda772e69 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -813,11 +813,19 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) .rw = sync ? READ_SYNC : READA, }; + /* + * If sync is false, it needs to check its block allocation. + * This is need and triggered by two flows: + * gc and truncate_partial_data_page. + */ + if (!sync) + goto search; + page = find_get_page(mapping, index); if (page && PageUptodate(page)) return page; f2fs_put_page(page, 0); - +search: if (f2fs_lookup_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; goto got_it; -- cgit v1.2.3 From 2bda542d59f970b61095bd8205c6c76062f286e3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 27 Feb 2015 15:56:16 +0800 Subject: f2fs: fix block_ops trace point block operations is used to flush all dirty node and dentry blocks in the page cache and suspend ordinary writing activities, however, there are some facts such like cp error or mount read-only etc which lead to block operations can't be invoked. Current trace point print block_ops start premature even if block_ops doesn't have opportunity to execute. This patch fix it by move block_ops trace point just before block_ops. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4d5e697d82f6..e9f30abbb6fe 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1047,8 +1047,6 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; - trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); - mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && @@ -1058,6 +1056,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; if (f2fs_readonly(sbi->sb)) goto out; + + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); + if (block_operations(sbi)) goto out; -- cgit v1.2.3 From 2b11a74b21b040270d19fc3b61265c75413c0dc1 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 27 Feb 2015 16:52:50 +0800 Subject: f2fs: don't need to collect dirty sit entries and flush journal when there's no dirty sit entries Don't need to collect dirty sit entries and flush sit journal to sit entries when there's no dirty sit entries. This patch check dirty_sentries earlier just like flush_nat_entries. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29c04edcb843..1e909340842a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1733,6 +1733,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); + if (!sit_i->dirty_sentries) + goto out; + /* * add and account sit entries of dirty bitmap in sit entry * set temporarily @@ -1747,9 +1750,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) remove_sits_in_journal(sbi); - if (!sit_i->dirty_sentries) - goto out; - /* * there are two steps to flush sit entries: * #1, flush sit entries to journal in current cold data summary block. -- cgit v1.2.3 From 14b42817763903070fa8cbf087369461400cc021 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 27 Feb 2015 17:38:13 +0800 Subject: f2fs: fix max orphan inodes calculation cp_payload is introduced for sit bitmap to support large volume, and it is just after the block of f2fs_checkpoint + nat bitmap, so the first segment should include F2FS_CP_PACKS + NR_CURSEG_TYPE + cp_payload + orphan blocks. However, current max orphan inodes calculation don't consider cp_payload, this patch fix it by reducing the number of cp_payload from total blocks of the first segment when calculate max orphan inodes. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e9f30abbb6fe..81d6d2f0e8ed 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1104,13 +1104,15 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) } /* - * considering 512 blocks in a segment 8 blocks are needed for cp - * and log segment summaries. Remaining blocks are used to keep - * orphan entries with the limitation one reserved segment - * for cp pack we can have max 1020*504 orphan entries + * considering 512 blocks in a segment 8+cp_payload blocks are + * needed for cp and log segment summaries. Remaining blocks are + * used to keep orphan entries with the limitation one reserved + * segment for cp pack we can have max 1020*(504-cp_payload) + * orphan entries */ sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; + NR_CURSEG_TYPE - __cp_payload(sbi)) * + F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) -- cgit v1.2.3 From 7ecebe5e07958a0b7e54a560dbc24144287c6b41 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 27 Feb 2015 13:13:14 +0100 Subject: f2fs: add cond_resched() to sync_dirty_dir_inodes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a preempt-off enviroment a alot of FS activity (write/delete) I run into a CPU stall: | NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/u2:2:59] | Modules linked in: | CPU: 0 PID: 59 Comm: kworker/u2:2 Tainted: G W 3.19.0-00010-g10c11c51ffed #153 | Workqueue: writeback bdi_writeback_workfn (flush-179:0) | task: df230000 ti: df23e000 task.ti: df23e000 | PC is at __submit_merged_bio+0x6c/0x110 | LR is at f2fs_submit_merged_bio+0x74/0x80 … | [] (gic_handle_irq) from [] (__irq_svc+0x44/0x5c) | Exception stack(0xdf23fb48 to 0xdf23fb90) | fb40: deef3484 ffff0001 ffff0001 00000027 deef3484 00000000 | fb60: deef3440 00000000 de426000 deef34ec deefc440 df23fbb4 df23fbb8 df23fb90 | fb80: c02191f0 c0218fa0 60000013 ffffffff | [] (__irq_svc) from [] (__submit_merged_bio+0x6c/0x110) | [] (__submit_merged_bio) from [] (f2fs_submit_merged_bio+0x74/0x80) | [] (f2fs_submit_merged_bio) from [] (sync_dirty_dir_inodes+0x70/0x78) | [] (sync_dirty_dir_inodes) from [] (write_checkpoint+0x104/0xc10) | [] (write_checkpoint) from [] (f2fs_sync_fs+0x80/0xbc) | [] (f2fs_sync_fs) from [] (f2fs_balance_fs_bg+0x4c/0x68) | [] (f2fs_balance_fs_bg) from [] (f2fs_write_node_pages+0x40/0x110) | [] (f2fs_write_node_pages) from [] (do_writepages+0x34/0x48) | [] (do_writepages) from [] (__writeback_single_inode+0x50/0x228) | [] (__writeback_single_inode) from [] (writeback_sb_inodes+0x1a8/0x378) | [] (writeback_sb_inodes) from [] (__writeback_inodes_wb+0x90/0xc8) | [] (__writeback_inodes_wb) from [] (wb_writeback+0x1dc/0x28c) | [] (wb_writeback) from [] (bdi_writeback_workfn+0x2ac/0x460) | [] (bdi_writeback_workfn) from [] (process_one_work+0x11c/0x3a4) | [] (process_one_work) from [] (worker_thread+0x17c/0x490) | [] (worker_thread) from [] (kthread+0xec/0x100) | [] (kthread) from [] (ret_from_fork+0x14/0x24) As it turns out, the code loops in sync_dirty_dir_inodes() and waits for others to make progress but since it never leaves the CPU there is no progress made. At the time of this stall, there is also a rm process blocked: | rm R running 0 1989 1774 0x00000000 | [] (__schedule) from [] (__cond_resched+0x30/0x4c) | [] (__cond_resched) from [] (_cond_resched+0x4c/0x54) | [] (_cond_resched) from [] (truncate_inode_pages_range+0x1f0/0x5e8) | [] (truncate_inode_pages_range) from [] (truncate_inode_pages+0x28/0x30) | [] (truncate_inode_pages) from [] (truncate_inode_pages_final+0x60/0x64) | [] (truncate_inode_pages_final) from [] (f2fs_evict_inode+0x4c/0x268) | [] (f2fs_evict_inode) from [] (evict+0x94/0x140) | [] (evict) from [] (iput+0xc8/0x134) | [] (iput) from [] (d_delete+0x154/0x180) | [] (d_delete) from [] (vfs_rmdir+0x114/0x12c) | [] (vfs_rmdir) from [] (do_rmdir+0x158/0x168) | [] (do_rmdir) from [] (SyS_unlinkat+0x30/0x3c) | [] (SyS_unlinkat) from [] (ret_fast_syscall+0x0/0x4c) As explained by Jaegeuk Kim: |This inode is the directory (c.f., do_rmdir) causing a infinite loop on |sync_dirty_dir_inodes. |The sync_dirty_dir_inodes tries to flush dirty dentry pages, but if the |inode is under eviction, it submits bios and do it again until eviction |is finished. This patch adds a cond_resched() (as suggested by Jaegeuk) after a BIO is submitted so other thread can make progress. Signed-off-by: Sebastian Andrzej Siewior [Jaegeuk Kim: change fs/f2fs to f2fs in subject as naming convention] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 81d6d2f0e8ed..53bc32804841 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -795,6 +795,7 @@ retry: * wribacking dentry pages in the freeing inode. */ f2fs_submit_merged_bio(sbi, DATA, WRITE); + cond_resched(); } goto retry; } -- cgit v1.2.3 From b28c3f94933dae663eeca24637bf9ff962bf02dc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Feb 2015 17:23:30 +0800 Subject: f2fs: fix to issue small discard in real-time mode discard Now in f2fs, we share functions and structures for batch mode and real-time mode discard. For real-time mode discard, in shared function add_discard_addrs, we will use uninitialized trim_minlen in struct cp_control to compare with length of contiguous free blocks to decide whether skipping discard fragmented freespace or not, this makes us ignore small discard sometimes. Fix it. Signed-off-by: Chao Yu Reviewed-by : Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e909340842a..3e80bd69d4a0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -552,7 +552,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - if (end - start < cpc->trim_minlen) + if (force && end - start < cpc->trim_minlen) continue; __add_discard_entry(sbi, cpc, start, end); -- cgit v1.2.3 From e1235983e385afafb33bab3578bfc83a7d871ce1 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Tue, 23 Dec 2014 08:37:39 +0900 Subject: f2fs: add stat info for moved blocks by background gc This patch is for looking into gc performance of f2fs in detail. Signed-off-by: Changman Lee [Jaegeuk Kim: fix build errors] Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 15 ++++++++++----- fs/f2fs/f2fs.h | 23 +++++++++++++++-------- fs/f2fs/gc.c | 6 +++--- 3 files changed, 28 insertions(+), 16 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 1006290ed6b8..f5388f37217e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -265,11 +265,16 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "CP calls: %d\n", si->cp_count); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); - seq_printf(s, " - data segments : %d\n", si->data_segs); - seq_printf(s, " - node segments : %d\n", si->node_segs); - seq_printf(s, "Try to move %d blocks\n", si->tot_blks); - seq_printf(s, " - data blocks : %d\n", si->data_blks); - seq_printf(s, " - node blocks : %d\n", si->node_blks); + seq_printf(s, " - data segments : %d (%d)\n", + si->data_segs, si->bg_data_segs); + seq_printf(s, " - node segments : %d (%d)\n", + si->node_segs, si->bg_node_segs); + seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, + si->bg_data_blks + si->bg_node_blks); + seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks, + si->bg_data_blks); + seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, + si->bg_node_blks); seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7ced71b69f33..ca7da0464379 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1634,7 +1634,9 @@ struct f2fs_stat_info { int dirty_count, node_pages, meta_pages; int prefree_count, call_count, cp_count; int tot_segs, node_segs, data_segs, free_segs, free_secs; + int bg_node_segs, bg_data_segs; int tot_blks, data_blks, node_blks; + int bg_data_blks, bg_node_blks; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; @@ -1683,31 +1685,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) -#define stat_inc_seg_count(sbi, type) \ +#define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) \ + if (type == SUM_TYPE_DATA) { \ si->data_segs++; \ - else \ + si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ + } else { \ si->node_segs++; \ + si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ + } \ } while (0) #define stat_inc_tot_blk_count(si, blks) \ (si->tot_blks += (blks)) -#define stat_inc_data_blk_count(sbi, blks) \ +#define stat_inc_data_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ + si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ } while (0) -#define stat_inc_node_blk_count(sbi, blks) \ +#define stat_inc_node_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ + si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ } while (0) int f2fs_build_stats(struct f2fs_sb_info *); @@ -1729,10 +1736,10 @@ void f2fs_destroy_root_stats(void); #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_inplace_blocks(sbi) -#define stat_inc_seg_count(si, type) +#define stat_inc_seg_count(sbi, type, gc_type) #define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(si, blks) -#define stat_inc_node_blk_count(sbi, blks) +#define stat_inc_data_blk_count(sbi, blks, gc_type) +#define stat_inc_node_blk_count(sbi, blks, gc_type) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 76adbc3641f1..ed58211fe79b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -435,7 +435,7 @@ next_step: set_page_dirty(node_page); } f2fs_put_page(node_page, 1); - stat_inc_node_blk_count(sbi, 1); + stat_inc_node_blk_count(sbi, 1, gc_type); } if (initial) { @@ -622,7 +622,7 @@ next_step: if (IS_ERR(data_page)) continue; move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); + stat_inc_data_blk_count(sbi, 1, gc_type); } } @@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, } blk_finish_plug(&plug); - stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); + stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); stat_inc_call_count(sbi->stat_info); f2fs_put_page(sum_page, 1); -- cgit v1.2.3 From d5669f7b9b0344a7f1e874ac86b9403434381ba8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Feb 2015 13:37:39 -0800 Subject: f2fs: avoid to trigger writepage during POR This patch doesn't make any effect on previous behavior, since f2fs_write_data_page bypasses writing the page during POR. But, the difference is that this patch avoids holding writepages mutex. This is to avoid the following false warning, since this can happen only when mount and shutdown are triggered at the same time. ====================================================== [ INFO: possible circular locking dependency detected ] 4.0.0-rc1+ #3 Tainted: G O ------------------------------------------------------- kworker/u8:0/2270 is trying to acquire lock: (&sbi->gc_mutex){+.+.+.}, at: [] f2fs_balance_fs+0x73/0x90 [f2fs] but task is already holding lock: (&sbi->writepages){+.+...}, at: [] f2fs_write_data_pages+0xcb/0x3a0 [f2fs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sbi->writepages){+.+...}: [] lock_acquire+0xe1/0x2f0 [] mutex_lock_nested+0x63/0x530 [] f2fs_write_data_pages+0xcb/0x3a0 [f2fs] [] do_writepages+0x21/0x50 [] __writeback_single_inode+0x76/0xbf0 [] writeback_single_inode+0xea/0x1c0 [] write_inode_now+0x95/0xa0 [] iput+0x20b/0x3f0 [] recover_data.constprop.14+0x26b/0xa80 [f2fs] [] recover_fsync_data+0x2b6/0x5e0 [f2fs] [] f2fs_fill_super+0xb24/0xb90 [f2fs] [] mount_bdev+0x1a4/0x1e0 [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x39/0x180 [] vfs_kern_mount+0x6b/0x160 [] do_mount+0x204/0xbe0 [] SyS_mount+0x8b/0xe0 [] system_call_fastpath+0x16/0x1b -> #1 (&sbi->cp_mutex){+.+...}: [] lock_acquire+0xe1/0x2f0 [] mutex_lock_nested+0x63/0x530 [] write_checkpoint+0x42/0x1230 [f2fs] [] f2fs_sync_fs+0x9d/0x2a0 [f2fs] [] sync_filesystem+0x82/0xb0 [] generic_shutdown_super+0x34/0x100 [] kill_block_super+0x27/0x70 [] kill_f2fs_super+0x20/0x30 [f2fs] [] deactivate_locked_super+0x49/0x80 [] deactivate_super+0x4e/0x70 [] cleanup_mnt+0x43/0x90 [] __cleanup_mnt+0x12/0x20 [] task_work_run+0xc4/0xf0 [] do_notify_resume+0x8d/0xa0 [] int_signal+0x12/0x17 -> #0 (&sbi->gc_mutex){+.+.+.}: [] __lock_acquire+0x1ac6/0x1c90 [] lock_acquire+0xe1/0x2f0 [] mutex_lock_nested+0x63/0x530 [] f2fs_balance_fs+0x73/0x90 [f2fs] [] f2fs_write_data_page+0x348/0x5b0 [f2fs] [] __f2fs_writepage+0x1a/0x50 [f2fs] [] write_cache_pages+0x274/0x6f0 [] f2fs_write_data_pages+0xe0/0x3a0 [f2fs] [] do_writepages+0x21/0x50 [] __writeback_single_inode+0x76/0xbf0 [] writeback_sb_inodes+0x32a/0x710 [] __writeback_inodes_wb+0x9f/0xd0 [] wb_writeback+0x3db/0x850 [] bdi_writeback_workfn+0x148/0x980 [] process_one_work+0x1e2/0x840 [] worker_thread+0x121/0x460 [] kthread+0xf8/0x110 [] ret_from_fork+0x7c/0xb0 Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 389fda772e69..0057d4baad33 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1408,6 +1408,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; + /* during POR, we don't need to trigger writepage at all. */ + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + goto skip_write; + diff = nr_pages_to_write(sbi, DATA, wbc); if (!S_ISDIR(inode->i_mode)) { -- cgit v1.2.3 From cff28521bbbfcd97ba56c515d4be5554c0d26014 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 2 Mar 2015 10:48:56 -0800 Subject: f2fs: clear append/update flags once fsync is done When fsync is done through checkpoint, previous f2fs missed to clear append and update flag. This patch fixes to clear them. This was originally catched by Changman Lee before. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 99cec04fa96e..5e8850edc5cc 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -241,6 +241,8 @@ go_write: * will be used only for fsynced inodes after checkpoint. */ try_to_fix_pino(inode); + clear_inode_flag(fi, FI_APPEND_WRITE); + clear_inode_flag(fi, FI_UPDATE_WRITE); goto out; } sync_nodes: -- cgit v1.2.3 From 76629165913a6d5542bbe735191b021f31dd3798 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 2 Mar 2015 16:28:16 -0800 Subject: f2fs: report -ENOENT for unreached data indices If inode has inline_data, it should report -ENOENT when accessing out-of-bound region. This is used by f2fs_fiemap which treats -ENOENT with no error. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 35a911722d59..4687eae6c116 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -501,7 +501,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) /* if inline_data is set, should not report any block indices */ if (f2fs_has_inline_data(dn->inode) && index) { - err = -EINVAL; + err = -ENOENT; f2fs_put_page(npage[0], 1); goto release_out; } -- cgit v1.2.3 From d7196c5a32caf6ebbe60771ff04ee7d797039d74 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Mar 2015 17:06:55 -0800 Subject: f2fs: relocate Kconfig from misc filesystems The f2fs has been shipped on many smartphone devices during a couple of years. So, it is worth to relocate Kconfig into main page from misc filesystems for developers to choose it more easily. Signed-off-by: Jaegeuk Kim --- fs/Kconfig | 2 +- fs/f2fs/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/Kconfig b/fs/Kconfig index ec35851e5b71..011f43365d7b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig" source "fs/ocfs2/Kconfig" source "fs/btrfs/Kconfig" source "fs/nilfs2/Kconfig" +source "fs/f2fs/Kconfig" config FS_DAX bool "Direct Access (DAX) support" @@ -217,7 +218,6 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" -source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 94e2d2ffabe1..05f0f663f14c 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -1,5 +1,5 @@ config F2FS_FS - tristate "F2FS filesystem support (EXPERIMENTAL)" + tristate "F2FS filesystem support" depends on BLOCK help F2FS is based on Log-structured File System (LFS), which supports -- cgit v1.2.3 From fdf6c8be337af72b066548f325abcbcdf626dea8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 6 Mar 2015 15:00:54 +0800 Subject: f2fs: fix extent cache memory leak extent tree/node slab cache is created during f2fs insmod, how, it isn't destroyed during f2fs rmmod, this patch fix it by destroy extent tree/node slab cache once rmmod f2fs. Signed-off-by: Wanpeng Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e649f21121d2..0b8a2d8efa72 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1323,6 +1323,7 @@ static void __exit exit_f2fs_fs(void) remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); + destroy_extent_cache(); destroy_checkpoint_caches(); destroy_segment_manager_caches(); destroy_node_manager_caches(); -- cgit v1.2.3 From 7fd97019b8db0668a4eb8b08d707bdce9f51d58e Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 6 Mar 2015 15:00:55 +0800 Subject: f2fs: reduce searching region of segmap when set free section In __set_free we will check whether all segment are free in one section when free one segment, in order to set section to free status. But the searching region of segmap is from start segno to last segno of main area, it's not necessary. So let's just only check all segment bitmap of target section. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7fd35111cf62..85d7fa7514b2 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) clear_bit(segno, free_i->free_segmap); free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); + next = find_next_bit(free_i->free_segmap, + start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; -- cgit v1.2.3 From b1f73b79d26ffd8966404c268e558d6fec448814 Mon Sep 17 00:00:00 2001 From: Yuan Zhong Date: Sat, 7 Mar 2015 10:07:42 +0000 Subject: f2fs: set the correct place of initializing *res_page The function 'find_in_inline_dir()' contain 'res_page' as an argument. So, we should initiaize 'res_page' before this function. Signed-off-by: Yuan Zhong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 590aeef6f85e..a28909d9aea8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -219,14 +219,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + *res_page = NULL; + if (f2fs_has_inline_dentry(dir)) return find_in_inline_dir(dir, child, res_page); if (npages == 0) return NULL; - *res_page = NULL; - name_hash = f2fs_dentry_hash(child); max_depth = F2FS_I(dir)->i_current_depth; -- cgit v1.2.3 From d9f46bb1a84ee15f58c9c58ab91962b7dc6eb9a4 Mon Sep 17 00:00:00 2001 From: Yuan Zhong Date: Mon, 9 Mar 2015 02:43:50 +0000 Subject: f2fs: remove unnecessary condition judgment Remove the unnecessary condition judgment, because 'max_slots' has been initialized to '0' at the beginging of the function, as following: if (max_slots) *max_slots = 0; Signed-off-by: Yuan Zhong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a28909d9aea8..473763f4ca5b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -139,7 +139,7 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, !memcmp(d->filename[bit_pos], name->name, name->len)) goto found; - if (max_slots && *max_slots >= 0 && max_len > *max_slots) { + if (max_slots && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } -- cgit v1.2.3 From e0150392ddfaaf5ccf338893f6db177a2c64a7ee Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 9 Mar 2015 08:07:04 +0900 Subject: f2fs: cleanup statement about max orphan inodes calc Through each macro, we can read the meaning easily. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 53bc32804841..384bfc4c36c3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1104,13 +1104,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) im->ino_num = 0; } - /* - * considering 512 blocks in a segment 8+cp_payload blocks are - * needed for cp and log segment summaries. Remaining blocks are - * used to keep orphan entries with the limitation one reserved - * segment for cp pack we can have max 1020*(504-cp_payload) - * orphan entries - */ sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE - __cp_payload(sbi)) * F2FS_ORPHANS_PER_BLOCK; -- cgit v1.2.3 From 57ed1e95babbd2df33f70eb820c7a9984199557b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 9 Mar 2015 11:00:55 +0800 Subject: f2fs: fix unlocked nat set cache operation nm_i->nat_tree_lock is used to sync both the operations of nat entry cache tree and nat set cache tree, however, it isn't held when flush nat entries during checkpoint which lead to potential race, this patch fix it by holding the lock when gang lookup nat set cache and delete item from nat set cache. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4687eae6c116..8ab0cf1930bd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1830,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; struct page *page = NULL; + struct f2fs_nm_info *nm_i = NM_I(sbi); /* * there are two steps to flush nat entries: @@ -1883,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, set->entry_cnt); + down_write(&nm_i->nat_tree_lock); radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + up_write(&nm_i->nat_tree_lock); kmem_cache_free(nat_entry_set_slab, set); } @@ -1911,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; @@ -1919,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) __adjust_nat_entry_set(setvec[idx], &sets, MAX_NAT_JENTRIES(sum)); } + up_write(&nm_i->nat_tree_lock); /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) -- cgit v1.2.3 From bda190760b07a7350de7ca8d9afe0fd8c667ad2d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Mar 2015 17:33:16 +0800 Subject: f2fs: fix to calculate max length of contiguous free slots correctly When lookuping for creating, we will try to record the level of current dentry hash table if current dentry has enough contiguous slots for storing name of new file which will be created later, this can save our lookup time when add a link into parent dir. But currently in find_target_dentry, our current length of contiguous free slots is not calculated correctly. This make us leaving some holes in dentry block occasionally, it wastes our space of dentry block. Let's refactor the lookup flow for max slots as following to fix this issue: a) increase max_len if current slot is free; b) update max_slots with max_len if max_len is larger than max_slots; c) reset max_len to zero if current slot is not free. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 473763f4ca5b..4e59c8219dde 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -127,22 +127,19 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, *max_slots = 0; while (bit_pos < d->max) { if (!test_bit_le(bit_pos, d->bitmap)) { - if (bit_pos == 0) - max_len = 1; - else if (!test_bit_le(bit_pos - 1, d->bitmap)) - max_len++; bit_pos++; + max_len++; continue; } + de = &d->dentry[bit_pos]; if (early_match_name(name->len, namehash, de) && !memcmp(d->filename[bit_pos], name->name, name->len)) goto found; - if (max_slots && max_len > *max_slots) { + if (max_slots && max_len > *max_slots) *max_slots = max_len; - max_len = 0; - } + max_len = 0; /* remain bug on condition */ if (unlikely(!de->name_len)) -- cgit v1.2.3 From 83dfe53c185e3554c102708c70dc1e5ff4bcac2c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Mar 2015 18:18:19 +0800 Subject: f2fs: fix reference leaks in f2fs_acl_create Our f2fs_acl_create is copied and modified from posix_acl_create to avoid deadlock bug when inline_dentry feature is enabled. Now, we got reference leaks in posix_acl_create, and this has been fixed in commit fed0b588be2f ("posix_acl: fix reference leaks in posix_acl_create") by Omar Sandoval. https://lkml.org/lkml/2015/2/9/5 Let's fix this issue in f2fs_acl_create too. Signed-off-by: Chao Yu Reviewed-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 742202779bd5..4320ffab3495 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -351,13 +351,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, *acl = f2fs_acl_clone(p, GFP_NOFS); if (!*acl) - return -ENOMEM; + goto no_mem; ret = f2fs_acl_create_masq(*acl, mode); - if (ret < 0) { - posix_acl_release(*acl); - return -ENOMEM; - } + if (ret < 0) + goto no_mem_clone; if (ret == 0) { posix_acl_release(*acl); @@ -378,6 +376,12 @@ no_acl: *default_acl = NULL; *acl = NULL; return 0; + +no_mem_clone: + posix_acl_release(*acl); +no_mem: + posix_acl_release(p); + return -ENOMEM; } int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, -- cgit v1.2.3 From 0bfcfcca3d4351f129b8c8a73c114c7ffa99228e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2015 13:16:25 +0800 Subject: f2fs: fix to truncate inline data past EOF Previously if inode is with inline data, we will try to invalid partial inline data in page #0 when we truncate size of inode in truncate_partial_data_page(). And then we set page #0 to dirty, after this we can synchronize inode page with page #0 at ->writepage(). But sometimes we will fail to operate page #0 in truncate_partial_data_page() due to below reason: a) if offset is zero, we will skip setting page #0 to dirty. b) if page #0 is not uptodate, we will fail to update it as it has no mapping data. So with following operations, we will meet recent data which should be truncated. 1.write inline data to file 2.sync first data page to inode page 3.truncate file size to 0 4.truncate file size to max_inline_size 5.echo 1 > /proc/sys/vm/drop_caches 6.read file --> meet original inline data which is remained in inode page. This patch renames truncate_inline_data() to truncate_inline_inode() for code readability, then use truncate_inline_inode() to truncate inline data in inode page in truncate_blocks() and truncate page #0 in truncate_partial_data_page() for fixing. v2: o truncate partially #0 page in truncate_partial_data_page to avoid keeping old data in #0 page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 16 +++++++++++----- fs/f2fs/inline.c | 26 +++++++++++++++++++++----- 3 files changed, 33 insertions(+), 10 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ca7da0464379..147ef316f0ff 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1763,6 +1763,7 @@ extern struct kmem_cache *inode_entry_slab; */ bool f2fs_may_inline(struct inode *); void read_inline_data(struct page *, struct page *); +bool truncate_inline_inode(struct page *, u64); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); int f2fs_convert_inline_inode(struct inode *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5e8850edc5cc..36dc7581a28b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -456,15 +456,16 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static int truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from, + bool force) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (!offset) + if (!offset && !force) return 0; - page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); + page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force); if (IS_ERR(page)) return 0; @@ -475,7 +476,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); - set_page_dirty(page); + if (!force) + set_page_dirty(page); out: f2fs_put_page(page, 1); return 0; @@ -489,6 +491,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) pgoff_t free_from; int count = 0, err = 0; struct page *ipage; + bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); @@ -504,7 +507,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { + if (truncate_inline_inode(ipage, from)) + set_page_dirty(ipage); f2fs_put_page(ipage, 1); + truncate_page = true; goto out; } @@ -535,7 +541,7 @@ out: /* lastly zero out the first data page */ if (!err) - err = truncate_partial_data_page(inode, from); + err = truncate_partial_data_page(inode, from, truncate_page); trace_f2fs_truncate_blocks_exit(inode, err); return err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4ba97320d194..153c5e7a0bef 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -50,10 +50,26 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } -static void truncate_inline_data(struct page *ipage) +bool truncate_inline_inode(struct page *ipage, u64 from) { + void *addr; + + /* + * we should never truncate inline data past max inline data size, + * because we always convert inline inode to normal one before + * truncating real data if new size is past max inline data size. + */ + f2fs_bug_on(F2FS_P_SB(ipage), from > MAX_INLINE_DATA); + + if (from >= MAX_INLINE_DATA) + return false; + + addr = inline_data_addr(ipage); + f2fs_wait_on_page_writeback(ipage, NODE); - memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); + memset(addr + from, 0, MAX_INLINE_DATA - from); + + return true; } int f2fs_read_inline_data(struct inode *inode, struct page *page) @@ -131,7 +147,7 @@ no_update: set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_data(dn->inode_page); + truncate_inline_inode(dn->inode_page, 0); clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); @@ -245,7 +261,7 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - truncate_inline_data(ipage); + truncate_inline_inode(ipage, 0); f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); @@ -363,7 +379,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_data(ipage); + truncate_inline_inode(ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); -- cgit v1.2.3 From d6d4f1cb912d9bcf988a529d0934568d4550f7b5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 12 Mar 2015 17:04:24 +0800 Subject: f2fs: fix to check current blkaddr in __allocate_data_blocks In __allocate_data_blocks, we should check current blkaddr which is located at ofs_in_node of dnode page instead of checking first blkaddr all the time. Otherwise we can only allocate one blkaddr in each dnode page. Fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0057d4baad33..ba70a78a396f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1066,7 +1066,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); while (dn.ofs_in_node < end_offset && len) { - if (dn.data_blkaddr == NULL_ADDR) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (blkaddr == NULL_ADDR) { if (__allocate_data_block(&dn)) goto sync_out; allocated = true; -- cgit v1.2.3 From 21cb1d99bcc77252e6426010bcc6433f75b581bb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Mar 2015 13:42:48 -0400 Subject: f2fs: fix to cover sentry_lock for block allocation In the following call stack, f2fs changes the bitmap for dirty segments and # of dirty sentries without grabbing sit_i->sentry_lock. This can result in mismatch on bitmap and # of dirty sentries, since if there are some direct_io operations. In allocate_data_block, - __allocate_new_segments - mutex_lock(&curseg->curseg_mutex); - s_ops->allocate_segment - new_curseg/change_curseg - reset_curseg - __set_sit_entry_type - __mark_sit_entry_dirty - set_bit(dirty_sentries_bitmap) - dirty_sentries++; Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3e80bd69d4a0..eafaf728236b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1167,6 +1167,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); /* direct_io'ed data is aligned to the segment for better performance */ if (direct_io && curseg->next_blkoff) @@ -1181,7 +1182,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, */ __add_sum_entry(sbi, type, sum); - mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); stat_inc_block_count(sbi, curseg); -- cgit v1.2.3 From 2adc3505cfb819c91855a64d997a7cdd9731ee6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 16 Mar 2015 21:08:44 +0800 Subject: f2fs: set SBI_NEED_FSCK when encountering exception in recovery This patch tries to set SBI_NEED_FSCK flag into sbi only when we fail to recover in fill_super, so we could skip fscking image when we fail to fill super for other reason. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0b8a2d8efa72..f9dc543805d9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -966,7 +966,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; - bool retry = true; + bool retry = true, need_fsck = false; char *options = NULL; int i; @@ -1155,9 +1155,6 @@ try_onemore: if (err) goto free_proc; - if (!retry) - set_sbi_flag(sbi, SBI_NEED_FSCK); - /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* @@ -1169,8 +1166,13 @@ try_onemore: err = -EROFS; goto free_kobj; } + + if (need_fsck) + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = recover_fsync_data(sbi); if (err) { + need_fsck = true; f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%ld", err); goto free_kobj; -- cgit v1.2.3 From 3402e87cfb5e762f9c95071bf4a2ad65fd9392a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Mar 2015 23:27:25 -0400 Subject: f2fs: set buffer_new when new blocks are allocated This patch modifies to call set_buffer_new, if new blocks are allocated. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ba70a78a396f..4a416e74bb2a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -257,7 +257,7 @@ static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, unsigned int blkbits = sb->s_blocksize_bits; size_t count; - set_buffer_new(bh_result); + clear_buffer_new(bh_result); map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); count = ei->fofs + ei->len - pgofs; if (count < (UINT_MAX >> blkbits)) @@ -1139,7 +1139,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { - set_buffer_new(bh_result); + clear_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else if (create) { err = __allocate_data_block(&dn); @@ -1184,6 +1184,7 @@ get_next: if (err) goto sync_out; allocated = true; + set_buffer_new(bh_result); blkaddr = dn.data_blkaddr; } /* Give more consecutive addresses for the readahead */ -- cgit v1.2.3 From 78373b7319abdf15050af5b1632c4c8b8b398f33 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 Mar 2015 21:44:36 -0700 Subject: f2fs: enhance multi-threads performance Previously, f2fs_write_data_pages has a mutex, sbi->writepages, to serialize data writes to maximize write bandwidth, while sacrificing multi-threads performance. Practically, however, multi-threads environment is much more important for users. So this patch tries to remove the mutex. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ------- fs/f2fs/f2fs.h | 1 - fs/f2fs/super.c | 1 - 3 files changed, 9 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4a416e74bb2a..f0a18a005bda 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1397,7 +1397,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool locked = false; int ret; long diff; @@ -1418,13 +1417,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); - if (!S_ISDIR(inode->i_mode)) { - mutex_lock(&sbi->writepages); - locked = true; - } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - if (locked) - mutex_unlock(&sbi->writepages); f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 147ef316f0ff..dbe114463a18 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -613,7 +613,6 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ - struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f9dc543805d9..fc6857f378cc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1027,7 +1027,6 @@ try_onemore: sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); clear_sbi_flag(sbi, SBI_POR_DOING); -- cgit v1.2.3 From 83e21db6939dd6ff50a81d54aaef4b44847aefff Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 16 Mar 2015 16:54:52 -0700 Subject: f2fs: avoid wrong f2fs_bug_on when truncating inline_data This patch removes wrong f2fs_bug_on in truncate_inline_inode. When there is no space, it can happen a corner case where i_isze is over MAX_INLINE_SIZE while its inode is still inline_data. The scenario is 1. write small data into file #A. 2. fill the whole partition to 100%. 3. truncate 4096 on file #A. 4. write data at 8192 offset. --> f2fs_write_begin -> -ENOSPC = f2fs_convert_inline_page -> f2fs_write_failed -> truncate_blocks -> truncate_inline_inode BUG_ON, since i_size is 4096. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 153c5e7a0bef..d3e0599ffab7 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -54,13 +54,6 @@ bool truncate_inline_inode(struct page *ipage, u64 from) { void *addr; - /* - * we should never truncate inline data past max inline data size, - * because we always convert inline inode to normal one before - * truncating real data if new size is past max inline data size. - */ - f2fs_bug_on(F2FS_P_SB(ipage), from > MAX_INLINE_DATA); - if (from >= MAX_INLINE_DATA) return false; -- cgit v1.2.3 From 3c6c2bebef79999b1827041696dc1881e637e3af Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 17 Mar 2015 17:16:35 -0700 Subject: f2fs: avoid punch_hole overhead when releasing volatile data This patch is to avoid some punch_hole overhead when releasing volatile data. If volatile data was not written yet, we just can make the first page as zero. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 6 ++++++ fs/f2fs/file.c | 6 ++++++ fs/f2fs/inode.c | 10 ++++++++++ 4 files changed, 24 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f0a18a005bda..486113db97b1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1291,6 +1291,8 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) write_data_page(page, &dn, fio); f2fs_update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); } out_writepage: f2fs_put_dnode(&dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dbe114463a18..9e378179ff93 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1232,6 +1232,7 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ }; @@ -1340,6 +1341,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); } +static inline bool f2fs_is_first_block_written(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); +} + static inline bool f2fs_is_drop_cache(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 36dc7581a28b..39a07a59b134 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -437,6 +437,9 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->data_blkaddr = NULL_ADDR; f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); + if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) + clear_inode_flag(F2FS_I(dn->inode), + FI_FIRST_BLOCK_WRITTEN); nr_free++; } if (nr_free) { @@ -1005,6 +1008,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp) if (!f2fs_is_volatile_file(inode)) return 0; + if (!f2fs_is_first_block_written(inode)) + return truncate_partial_data_page(inode, 0, true); + punch_hole(inode, 0, F2FS_BLKSIZE); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b5087443be41..bb5800969bcc 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -51,6 +51,13 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static bool __written_first_block(struct f2fs_inode *ri) +{ + if (ri->i_addr[0] != NEW_ADDR && ri->i_addr[0] != NULL_ADDR) + return true; + return false; +} + static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { @@ -143,6 +150,9 @@ static int do_read_inode(struct inode *inode) /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); + if (__written_first_block(ri)) + set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + f2fs_put_page(node_page, 1); stat_inc_inline_inode(inode); -- cgit v1.2.3 From 8ce67cb07dbf6ba35aea1e07e8ad1ea004ced27b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 17 Mar 2015 17:58:08 -0700 Subject: f2fs: add some tracepoints to debug volatile and atomic writes Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/segment.c | 4 ++++ include/trace/events/f2fs.h | 27 ++++++++++++++++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 486113db97b1..241b98741df9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1287,9 +1287,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) need_inplace_update(inode))) { rewrite_data_page(page, fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(page, &dn, fio); f2fs_update_extent_cache(&dn); + trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); if (page->index == 0) set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e378179ff93..c9818193e15e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -556,6 +556,10 @@ enum page_type { META, NR_PAGE_TYPE, META_FLUSH, + INMEM, /* the below types are used by tracepoints only. */ + INMEM_DROP, + IPU, + OPU, }; struct f2fs_io_info { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index eafaf728236b..f939660941bb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -205,6 +205,8 @@ retry: list_add_tail(&new->list, &fi->inmem_pages); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); + + trace_f2fs_register_inmem_page(page, INMEM); } void commit_inmem_pages(struct inode *inode, bool abort) @@ -238,11 +240,13 @@ void commit_inmem_pages(struct inode *inode, bool abort) f2fs_wait_on_page_writeback(cur->page, DATA); if (clear_page_dirty_for_io(cur->page)) inode_dec_dirty_pages(inode); + trace_f2fs_commit_inmem_page(cur->page, INMEM); do_write_data_page(cur->page, &fio); submit_bio = true; } f2fs_put_page(cur->page, 1); } else { + trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); put_page(cur->page); } radix_tree_delete(&fi->inmem_root, cur->page->index); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 09a56dc75f8d..75724bd28356 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -14,7 +14,11 @@ { NODE, "NODE" }, \ { DATA, "DATA" }, \ { META, "META" }, \ - { META_FLUSH, "META_FLUSH" }) + { META_FLUSH, "META_FLUSH" }, \ + { INMEM, "INMEM" }, \ + { INMEM_DROP, "INMEM_DROP" }, \ + { IPU, "IN-PLACE" }, \ + { OPU, "OUT-OF-PLACE" }) #define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) @@ -854,6 +858,13 @@ DEFINE_EVENT(f2fs__page, f2fs_writepage, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_do_write_data_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_readpage, TP_PROTO(struct page *page, int type), @@ -875,6 +886,20 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_register_inmem_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + +DEFINE_EVENT(f2fs__page, f2fs_commit_inmem_page, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + TRACE_EVENT(f2fs_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type), -- cgit v1.2.3 From 368a0e40b5aedb036de3d78333571c8689869490 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 19 Mar 2015 13:23:48 +0800 Subject: f2fs: enable fast symlink by utilizing inline data Fast symlink can utilize inline data flow to avoid using any i_addr region, since we need to handle many cases such as truncation, roll-forward recovery, and fsck/dump tools. Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d3e0599ffab7..375d2c797f4c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode) if (f2fs_is_atomic_file(inode)) return false; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; if (i_size_read(inode) > MAX_INLINE_DATA) -- cgit v1.2.3 From 216a620a7c3d35ae604ba519c99c5cd1ce4dad6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:23:32 +0800 Subject: f2fs: split set_data_blkaddr from f2fs_update_extent_cache Split __set_data_blkaddr from f2fs_update_extent_cache for readability. Additionally rename __set_data_blkaddr to set_data_blkaddr for exporting. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 1 + fs/f2fs/inline.c | 1 + fs/f2fs/recovery.c | 1 + 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 241b98741df9..6492f88a2cc4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -200,7 +200,7 @@ alloc_new: * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn) +void set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -229,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); + set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -784,9 +784,6 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - /* Update the page address in the parent node */ - __set_data_blkaddr(dn); - if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; @@ -1032,7 +1029,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ - __set_data_blkaddr(dn); + set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -1290,6 +1287,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(page, &dn, fio); + set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); trace_f2fs_do_write_data_page(page, OPU); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c9818193e15e..83223d8b32bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1591,6 +1591,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); +void set_data_blkaddr(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 39a07a59b134..da1392965424 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -435,6 +435,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) continue; dn->data_blkaddr = NULL_ADDR; + set_data_blkaddr(dn); f2fs_update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 375d2c797f4c..8241a87046c3 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -131,6 +131,7 @@ no_update: set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); + set_data_blkaddr(dn); f2fs_update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6e40874dfbb9..e60ffaa380f1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -403,6 +403,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); dn.data_blkaddr = dest; + set_data_blkaddr(&dn); f2fs_update_extent_cache(&dn); recovered++; } -- cgit v1.2.3 From 93dfc52656e9ae1009ccba67e67ff69285b7743f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:24:59 +0800 Subject: f2fs: introduce __{find,grab}_extent_tree This patch introduces __{find,grab}_extent_tree for reusing by following patches. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 79 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 31 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6492f88a2cc4..67b3a99e03d5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -394,6 +394,49 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, et->cached_en = NULL; } +static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, + nid_t ino) +{ + struct extent_tree *et; + + down_read(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + up_read(&sbi->extent_tree_lock); + return NULL; + } + atomic_inc(&et->refcount); + up_read(&sbi->extent_tree_lock); + + return et; +} + +static struct extent_tree *__grab_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + nid_t ino = inode->i_ino; + + down_write(&sbi->extent_tree_lock); + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) { + et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); + f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + memset(et, 0, sizeof(struct extent_tree)); + et->ino = ino; + et->root = RB_ROOT; + et->cached_en = NULL; + rwlock_init(&et->lock); + atomic_set(&et->refcount, 0); + et->count = 0; + sbi->total_ext_tree++; + } + atomic_inc(&et->refcount); + up_write(&sbi->extent_tree_lock); + + return et; +} + static struct extent_node *__lookup_extent_tree(struct extent_tree *et, unsigned int fofs) { @@ -538,14 +581,9 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, trace_f2fs_lookup_extent_tree_start(inode, pgofs); - down_read(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); - if (!et) { - up_read(&sbi->extent_tree_lock); + et = __find_extent_tree(sbi, inode->i_ino); + if (!et) return false; - } - atomic_inc(&et->refcount); - up_read(&sbi->extent_tree_lock); read_lock(&et->lock); en = __lookup_extent_tree(et, pgofs); @@ -570,7 +608,6 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - nid_t ino = inode->i_ino; struct extent_tree *et; struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; struct extent_node *den = NULL; @@ -579,22 +616,7 @@ static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, trace_f2fs_update_extent_tree(inode, fofs, blkaddr); - down_write(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, ino); - if (!et) { - et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); - f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); - memset(et, 0, sizeof(struct extent_tree)); - et->ino = ino; - et->root = RB_ROOT; - et->cached_en = NULL; - rwlock_init(&et->lock); - atomic_set(&et->refcount, 0); - et->count = 0; - sbi->total_ext_tree++; - } - atomic_inc(&et->refcount); - up_write(&sbi->extent_tree_lock); + et = __grab_extent_tree(inode); write_lock(&et->lock); @@ -732,14 +754,9 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (!test_opt(sbi, EXTENT_CACHE)) return; - down_read(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); - if (!et) { - up_read(&sbi->extent_tree_lock); + et = __find_extent_tree(sbi, inode->i_ino); + if (!et) goto out; - } - atomic_inc(&et->refcount); - up_read(&sbi->extent_tree_lock); /* free all extent info belong to this extent tree */ write_lock(&et->lock); -- cgit v1.2.3 From 028a41e89383e1208dff1afe3e260b8cb6d3431c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:26:02 +0800 Subject: f2fs: initialize extent tree with on-disk extent info of inode With normal extent info cache, we records largest extent mapping between logical block and physical block into extent info, and we persist extent info in on-disk inode. When we enable extent tree cache, if extent info of on-disk inode is exist, and the extent is not a small fragmented mapping extent. We'd better to load the extent info into extent tree cache when inode is loaded. By this way we can have more chance to hit extent tree cache rather than taking more time to read dnode page for block address. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 4 +--- 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 67b3a99e03d5..e3be4be3a6d8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -572,6 +572,39 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - et->count; } +static void f2fs_init_extent_tree(struct inode *inode, + struct f2fs_extent *i_ext) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et; + struct extent_node *en; + struct extent_info ei; + + if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) + return; + + et = __grab_extent_tree(inode); + + write_lock(&et->lock); + if (et->count) + goto out; + + set_extent_info(&ei, le32_to_cpu(i_ext->fofs), + le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); + + en = __insert_extent_tree(sbi, et, &ei, NULL); + if (en) { + et->cached_en = en; + + spin_lock(&sbi->extent_lock); + list_add_tail(&en->list, &sbi->extent_list); + spin_unlock(&sbi->extent_lock); + } +out: + write_unlock(&et->lock); + atomic_dec(&et->refcount); +} + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { @@ -782,6 +815,16 @@ out: return; } +void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) +{ + if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + f2fs_init_extent_tree(inode, i_ext); + + write_lock(&F2FS_I(inode)->ext_lock); + get_extent_info(&F2FS_I(inode)->ext, *i_ext); + write_unlock(&F2FS_I(inode)->ext_lock); +} + static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 83223d8b32bb..431b4af693e5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1596,6 +1596,7 @@ int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); void f2fs_destroy_extent_tree(struct inode *); +void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); void f2fs_update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index bb5800969bcc..9e64d3efb2d2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -137,9 +137,7 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - write_lock(&fi->ext_lock); - get_extent_info(&fi->ext, ri->i_ext); - write_unlock(&fi->ext_lock); + f2fs_init_extent_cache(inode, &ri->i_ext); get_inline_info(fi, ri); -- cgit v1.2.3 From 0bdee482509fe8c3cf0e66231ed37b8e70954093 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2015 19:27:51 +0800 Subject: f2fs: preserve extent info for extent cache This patch tries to preserve last extent info in extent tree cache into on-disk inode, so this can help us to reuse the last extent info next time for performance. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/inode.c | 5 +++++ 3 files changed, 62 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e3be4be3a6d8..8a940e518be8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -719,6 +719,55 @@ update_extent: atomic_dec(&et->refcount); } +void f2fs_preserve_extent_tree(struct inode *inode) +{ + struct extent_tree *et; + struct extent_info *ext = &F2FS_I(inode)->ext; + bool sync = false; + + if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) + return; + + et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); + if (!et) { + if (ext->len) { + ext->len = 0; + update_inode_page(inode); + } + return; + } + + read_lock(&et->lock); + if (et->count) { + struct extent_node *en; + + if (et->cached_en) { + en = et->cached_en; + } else { + struct rb_node *node = rb_first(&et->root); + + if (!node) + node = rb_last(&et->root); + en = rb_entry(node, struct extent_node, rb_node); + } + + if (__is_extent_same(ext, &en->ei)) + goto out; + + *ext = en->ei; + sync = true; + } else if (ext->len) { + ext->len = 0; + sync = true; + } +out: + read_unlock(&et->lock); + atomic_dec(&et->refcount); + + if (sync) + update_inode_page(inode); +} + void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 431b4af693e5..0be198339c3b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -371,6 +371,13 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->len = len; } +static inline bool __is_extent_same(struct extent_info *ei1, + struct extent_info *ei2) +{ + return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk && + ei1->len == ei2->len); +} + static inline bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front) { @@ -1598,6 +1605,7 @@ void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); void f2fs_destroy_extent_tree(struct inode *); void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); void f2fs_update_extent_cache(struct dnode_of_data *); +void f2fs_preserve_extent_tree(struct inode *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9e64d3efb2d2..07237ac2935e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -343,7 +343,12 @@ void f2fs_evict_inode(struct inode *inode) no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + + /* update extent info in inode */ + if (inode->i_nlink) + f2fs_preserve_extent_tree(inode); f2fs_destroy_extent_tree(inode); + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); -- cgit v1.2.3 From 75342797988a0f9ebec400a2dde8d4de581c4079 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 24 Mar 2015 10:20:27 +0800 Subject: f2fs: enable inline data by default Enable inline_data feature by default since it brings us better performance and space utilization and now has already stable. Add another option noinline_data to disable it during mount. Suggested-by: Jaegeuk Kim Suggested-by: Chao Yu Signed-off-by: Wanpeng Li Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/super.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'fs/f2fs') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 48e2123c2582..e9e750e59efc 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -144,6 +144,8 @@ extent_cache Enable an extent cache based on rb-tree, it can cache as many as extent which map between contiguous logical address and physical address per inode, resulting in increasing the cache hit ratio. +noinline_data Disable the inline data feature, inline data feature is + enabled by default. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc6857f378cc..45aa843fa597 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -58,6 +58,7 @@ enum { Opt_nobarrier, Opt_fastboot, Opt_extent_cache, + Opt_noinline_data, Opt_err, }; @@ -80,6 +81,7 @@ static match_table_t f2fs_tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, {Opt_extent_cache, "extent_cache"}, + {Opt_noinline_data, "noinline_data"}, {Opt_err, NULL}, }; @@ -372,6 +374,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_extent_cache: set_opt(sbi, EXTENT_CACHE); break; + case Opt_noinline_data: + clear_opt(sbi, INLINE_DATA); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -596,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + else + seq_puts(seq, ",noinline_data"); if (test_opt(sbi, INLINE_DENTRY)) seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) @@ -991,6 +998,7 @@ try_onemore: sbi->active_logs = NR_CURSEG_TYPE; set_opt(sbi, BG_GC); + set_opt(sbi, INLINE_DATA); #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); -- cgit v1.2.3 From df6136ef5533421e68ea7ff9c33d5b2ac9005ff9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2015 10:33:37 +0800 Subject: f2fs: preallocate fallocated blocks for direct IO Normally, due to DIO_SKIP_HOLES flag is set by default, blockdev_direct_IO in f2fs_direct_IO tries to skip DIO in holes when writing inside i_size, this makes us falling back to buffered IO which shows lower performance. So in commit 59b802e5a453 ("f2fs: allocate data blocks in advance for f2fs_direct_IO"), we improve perfromance by allocating data blocks in advance if we meet holes no matter in i_size or not, since with it we can avoid falling back to buffered IO. But we forget to consider for unwritten fallocated block in this commit. This patch tries to fix it for fallocate case, this helps to improve performance. Test result: Storage info: sandisk ultra 64G micro sd card. touch /mnt/f2fs/file truncate -s 67108864 /mnt/f2fs/file fallocate -o 0 -l 67108864 /mnt/f2fs/file time dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=64 conv=notrunc oflag=direct Time before applying the patch: 67108864 bytes (67 MB) copied, 36.16 s, 1.9 MB/s real 0m36.162s user 0m0.000s sys 0m0.180s Time after applying the patch: 67108864 bytes (67 MB) copied, 27.7776 s, 2.4 MB/s real 0m27.780s user 0m0.000s sys 0m0.036s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8a940e518be8..614e444e5297 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1126,16 +1126,23 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; + + dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); + if (dn->data_blkaddr == NEW_ADDR) + goto alloc; + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; +alloc: get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) seg = CURSEG_DIRECT_IO; - allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); + allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, + &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ set_data_blkaddr(dn); @@ -1175,7 +1182,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); - if (blkaddr == NULL_ADDR) { + if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { if (__allocate_data_block(&dn)) goto sync_out; allocated = true; -- cgit v1.2.3 From 84e97c2767f9ed711718b47b8722ddd137f1dfe9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2015 10:36:15 +0800 Subject: f2fs: avoid NULL pointer dereference in f2fs_xattr_advise_get We will encounter oops by executing below command. getfattr -n system.advise /mnt/f2fs/file Killed message log: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] f2fs_xattr_advise_get+0x29/0x40 [f2fs] *pdpt = 00000000319b7001 *pde = 0000000000000000 Oops: 0002 [#1] SMP Modules linked in: f2fs(O) snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq joydev snd_seq_device snd_timer bnep snd rfcomm microcode bluetooth soundcore i2c_piix4 mac_hid serio_raw parport_pc ppdev lp parport binfmt_misc hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] CPU: 3 PID: 3134 Comm: getfattr Tainted: G O 4.0.0-rc1 #6 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: f3a71b60 ti: f19a6000 task.ti: f19a6000 EIP: 0060:[] EFLAGS: 00010246 CPU: 3 EIP is at f2fs_xattr_advise_get+0x29/0x40 [f2fs] EAX: 00000000 EBX: f19a7e71 ECX: 00000000 EDX: f8b5b467 ESI: 00000000 EDI: f2008570 EBP: f19a7e14 ESP: f19a7e08 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: 00000000 CR3: 319b8000 CR4: 000007f0 Stack: f8b5a634 c0cbb580 00000000 f19a7e34 c1193850 00000000 00000007 f19a7e71 f19a7e64 c0cbb580 c1193810 f19a7e50 c1193c00 00000000 00000000 00000000 c0cbb580 00000000 f19a7f70 c1194097 00000000 00000000 00000000 74737973 Call Trace: [] generic_getxattr+0x40/0x50 [] ? xattr_resolve_name+0x80/0x80 [] vfs_getxattr+0x70/0xa0 [] getxattr+0x87/0x190 [] ? path_lookupat+0x57/0x5f0 [] ? putname+0x32/0x50 [] ? kmem_cache_alloc+0x2a/0x130 [] ? putname+0x32/0x50 [] ? putname+0x32/0x50 [] ? putname+0x32/0x50 [] ? user_path_at_empty+0x49/0x70 [] ? user_path_at+0x1f/0x30 [] path_getxattr+0x47/0x80 [] SyS_getxattr+0x27/0x30 [] sysenter_do_call+0x12/0x12 Code: 66 90 55 89 e5 57 56 53 66 66 66 66 90 8b 78 20 89 d3 ba 67 b4 b5 f8 89 d8 89 ce e8 42 7c 7b c8 85 c0 75 16 0f b6 87 44 01 00 00 <88> 06 b8 01 00 00 00 5b 5e 5f 5d c3 8d 76 00 b8 ea ff ff ff eb EIP: [] f2fs_xattr_advise_get+0x29/0x40 [f2fs] SS:ESP 0068:f19a7e08 CR2: 0000000000000000 ---[ end trace 860260654f1f416a ]--- The reason is that in getfattr there are two steps which is indicated by strace info: 1) try to lookup and get size of specified xattr. 2) get value of the extented attribute. strace info: getxattr("/mnt/f2fs/file", "system.advise", 0x0, 0) = 1 getxattr("/mnt/f2fs/file", "system.advise", "\x00", 256) = 1 For the first step, getfattr may pass a NULL pointer in @value and zero in @size as parameters for ->getxattr, but we access this @value pointer directly without checking whether the pointer is valid or not in f2fs_xattr_advise_get, so the oops occurs. This patch fixes this issue by verifying @value pointer before using. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 5072bf9ae0ef..55649bec0b4d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -135,7 +135,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, if (strcmp(name, "") != 0) return -EINVAL; - *((char *)buffer) = F2FS_I(inode)->i_advise; + if (buffer) + *((char *)buffer) = F2FS_I(inode)->i_advise; return sizeof(char); } -- cgit v1.2.3 From 30c62fdb25cc0c55fa442245f7aaab074d2e701a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2015 10:37:39 +0800 Subject: f2fs: persist system.advise into on-disk inode This patch fixes to dirty inode for persisting i_advise of f2fs inode info into on-disk inode if user sets system.advise through setxattr. Otherwise the new value will be lost. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 55649bec0b4d..b0fd2f2d0716 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -153,6 +153,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; + mark_inode_dirty(inode); return 0; } -- cgit v1.2.3 From 1b3e27a92ab60452b8fbb35e3ba691ac34f2c0fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Mar 2015 13:08:05 +0800 Subject: f2fs: limit b_size of mapped bh in f2fs_map_bh Map bh over max size which caller defined is not needed, limit it in f2fs_map_bh. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 614e444e5297..93aae548cf80 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -255,15 +255,13 @@ static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, struct extent_info *ei, struct buffer_head *bh_result) { unsigned int blkbits = sb->s_blocksize_bits; - size_t count; + size_t max_size = bh_result->b_size; + size_t mapped_size; clear_buffer_new(bh_result); map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); - count = ei->fofs + ei->len - pgofs; - if (count < (UINT_MAX >> blkbits)) - bh_result->b_size = (count << blkbits); - else - bh_result->b_size = UINT_MAX; + mapped_size = (ei->fofs + ei->len - pgofs) << blkbits; + bh_result->b_size = min(max_size, mapped_size); } static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, -- cgit v1.2.3 From adad81ed42bbc537f37192dcdd9a83e34bb61987 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Mar 2015 12:04:20 -0700 Subject: f2fs: fix sparse warnings This patch fixes the below warning. sparse warnings: (new ones prefixed by >>) >> fs/f2fs/inode.c:56:23: sparse: restricted __le32 degrades to integer >> fs/f2fs/inode.c:56:52: sparse: restricted __le32 degrades to integer Reported-by: kbuild test robot Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 07237ac2935e..e622ec95409e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -53,7 +53,9 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) static bool __written_first_block(struct f2fs_inode *ri) { - if (ri->i_addr[0] != NEW_ADDR && ri->i_addr[0] != NULL_ADDR) + block_t addr = le32_to_cpu(ri->i_addr[0]); + + if (addr != NEW_ADDR && addr != NULL_ADDR) return true; return false; } -- cgit v1.2.3 From c9ef481097d17fb8ff8ea7930ce715b5a676f10f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 26 Mar 2015 18:46:38 -0700 Subject: f2fs: fix mismatching lock and unlock pages for roll-forward recovery Previously, inode page is not correctly locked and unlocked in pair during the roll-forward recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 48 +++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e60ffaa380f1..c69de88a6453 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -258,6 +258,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum_node; struct f2fs_summary sum; struct page *sum_page, *node_page; + struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; unsigned int offset; @@ -285,17 +286,15 @@ got_it: /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); if (dn->inode->i_ino == nid) { - struct dnode_of_data tdn = *dn; tdn.nid = nid; + if (!dn->inode_page_locked) + lock_page(dn->inode_page); tdn.node_page = dn->inode_page; tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; + goto truncate_out; } else if (dn->nid == nid) { - struct dnode_of_data tdn = *dn; tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); - truncate_data_blocks_range(&tdn, 1); - return 0; + goto truncate_out; } /* Get the node page */ @@ -319,18 +318,33 @@ got_it: bidx = start_bidx_of_node(offset, F2FS_I(inode)) + le16_to_cpu(sum.ofs_in_node); - if (ino != dn->inode->i_ino) { - truncate_hole(inode, bidx, bidx + 1); + /* + * if inode page is locked, unlock temporarily, but its reference + * count keeps alive. + */ + if (ino == dn->inode->i_ino && dn->inode_page_locked) + unlock_page(dn->inode_page); + + set_new_dnode(&tdn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) + goto out; + + if (tdn.data_blkaddr == blkaddr) + truncate_data_blocks_range(&tdn, 1); + + f2fs_put_dnode(&tdn); +out: + if (ino != dn->inode->i_ino) iput(inode); - } else { - struct dnode_of_data tdn; - set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); - if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) - return 0; - if (tdn.data_blkaddr != NULL_ADDR) - truncate_data_blocks_range(&tdn, 1); - f2fs_put_page(tdn.node_page, 1); - } + else if (dn->inode_page_locked) + lock_page(dn->inode_page); + return 0; + +truncate_out: + if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) + truncate_data_blocks_range(&tdn, 1); + if (dn->inode->i_ino == nid && !dn->inode_page_locked) + unlock_page(dn->inode_page); return 0; } -- cgit v1.2.3 From 510022a85839a8409d1e6a519bb86ce71a84f30a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:07:16 -0700 Subject: f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries If f2fs was corrupted with missing dot dentries, it needs to recover them after fsck.f2fs detection. The underlying precedure is: 1. The fsck.f2fs remains F2FS_INLINE_DOTS flag in directory inode, if it detects missing dot dentries. 2. When f2fs looks up the corrupted directory, it triggers f2fs_add_link with proper inode numbers and their dot and dotdot names. 3. Once f2fs recovers the directory without errors, it removes F2FS_INLINE_DOTS finally. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 52 ++++++++++++++++++++++++++----------------------- fs/f2fs/f2fs.h | 22 ++++++++++++++++----- fs/f2fs/inline.c | 29 +++++++++++++++------------ fs/f2fs/namei.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/recovery.c | 2 +- include/linux/f2fs_fs.h | 1 + 6 files changed, 112 insertions(+), 42 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4e59c8219dde..906b5876dc27 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -59,9 +59,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, umode_t mode) { - umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -282,7 +281,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, lock_page(page); f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; @@ -328,14 +327,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); memcpy(d->filename[0], ".", 1); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode); + set_de_type(de, inode->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); @@ -432,7 +431,7 @@ error: void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); @@ -447,7 +446,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } @@ -471,7 +470,7 @@ next: goto next; } -void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) { @@ -483,8 +482,8 @@ void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, de->hash_code = name_hash; de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); - de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode); + de->ino = cpu_to_le32(ino); + set_de_type(de, mode); for (i = 0; i < slots; i++) test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); } @@ -494,7 +493,7 @@ void f2fs_update_dentry(struct inode *inode, struct f2fs_dentry_ptr *d, * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; @@ -507,11 +506,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); - struct page *page; + struct page *page = NULL; int err = 0; if (f2fs_has_inline_dentry(dir)) { - err = f2fs_add_inline_entry(dir, name, inode); + err = f2fs_add_inline_entry(dir, name, inode, ino, mode); if (!err || err != -EAGAIN) return err; else @@ -561,26 +560,31 @@ start: add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } } make_dentry_ptr(&d, (void *)dentry_blk, 1); - f2fs_update_dentry(inode, &d, name, dentry_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); set_page_dirty(dentry_page); - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, current_depth); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode_page(dir); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0be198339c3b..053361ace0ec 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1246,6 +1246,7 @@ enum { FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1282,6 +1283,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_DENTRY); if (ri->i_inline & F2FS_DATA_EXIST) set_inode_flag(fi, FI_DATA_EXIST); + if (ri->i_inline & F2FS_INLINE_DOTS) + set_inode_flag(fi, FI_INLINE_DOTS); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1297,6 +1300,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(fi, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; + if (is_inode_flag_set(fi, FI_INLINE_DOTS)) + ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1342,6 +1347,11 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); } +static inline int f2fs_has_inline_dots(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1440,7 +1450,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; -void set_de_type(struct f2fs_dir_entry *, struct inode *); +void set_de_type(struct f2fs_dir_entry *, umode_t); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *); bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, @@ -1459,9 +1469,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); -void f2fs_update_dentry(struct inode *, struct f2fs_dentry_ptr *, +void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); -int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); +int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, + umode_t); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); @@ -1471,7 +1482,7 @@ bool f2fs_empty_dir(struct inode *); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) { return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, - inode); + inode, inode->i_ino, inode->i_mode); } /* @@ -1792,7 +1803,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); -int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, + nid_t, umode_t); void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8241a87046c3..8140e4f0e538 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -390,7 +390,7 @@ out: } int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, - struct inode *inode) + struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; @@ -400,7 +400,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, struct f2fs_inline_dentry *dentry_blk = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(namelen); - struct page *page; + struct page *page = NULL; int err = 0; ipage = get_node_page(sbi, dir->i_ino); @@ -417,29 +417,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, ipage); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; + if (inode) { + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } } f2fs_wait_on_page_writeback(ipage, NODE); name_hash = f2fs_dentry_hash(name); make_dentry_ptr(&d, (void *)dentry_blk, 2); - f2fs_update_dentry(inode, &d, name, name_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); set_page_dirty(ipage); /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); - f2fs_put_page(page, 1); + if (inode) { + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + } update_parent_metadata(dir, inode, 0); fail: - up_write(&F2FS_I(inode)->i_sem); + if (inode) + up_write(&F2FS_I(inode)->i_sem); if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { update_inode(dir, ipage); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1e2ae21bd6b6..8055e30eb14d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -187,6 +187,44 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); } +static int __recover_dot_dentries(struct inode *dir, nid_t pino) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct qstr dot = QSTR_INIT(".", 1); + struct qstr dotdot = QSTR_INIT("..", 2); + struct f2fs_dir_entry *de; + struct page *page; + int err = 0; + + f2fs_lock_op(sbi); + + de = f2fs_find_entry(dir, &dot, &page); + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + } else { + err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); + if (err) + goto out; + } + + de = f2fs_find_entry(dir, &dotdot, &page); + if (de) { + f2fs_dentry_kunmap(dir, page); + f2fs_put_page(page, 0); + } else { + err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); + } +out: + if (!err) { + clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); + mark_inode_dirty(dir); + } + + f2fs_unlock_op(sbi); + return err; +} + static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -206,6 +244,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); + + if (f2fs_has_inline_dots(inode)) { + int err; + + err = __recover_dot_dentries(inode, dir->i_ino); + if (err) { + iget_failed(inode); + return ERR_PTR(err); + } + } } return d_splice_alias(inode, dentry); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c69de88a6453..679c465e9def 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -115,7 +115,7 @@ retry: iput(einode); goto retry; } - err = __f2fs_add_link(dir, &name, inode); + err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); if (err) goto out_err; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 502f28cfb78e..591f8c3ef410 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -178,6 +178,7 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -- cgit v1.2.3 From cb58463bc94470c428f3b9f4cda748e7755218ca Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2015 15:23:45 -0700 Subject: f2fs: assign parent's i_mode for empty dir When assigning i_mode for dotdot, it needs to assign parent's i_mode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 906b5876dc27..3a3302ab7871 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -334,7 +334,7 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); memcpy(d->filename[1], "..", 2); - set_de_type(de, inode->i_mode); + set_de_type(de, parent->i_mode); test_and_set_bit_le(0, (void *)d->bitmap); test_and_set_bit_le(1, (void *)d->bitmap); -- cgit v1.2.3 From 418f6c277011d89c394309e72df9ad058e0a3f7d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 Mar 2015 18:03:29 -0700 Subject: f2fs: do not increase link count during recovery If there are multiple fsynced dnodes having a dent flag, roll-forward routine sets FI_INC_LINK for their inode, and recovery_dentry increases its link count accordingly. That results in normal file having a link count as 2, so we can't unlink those files. This was added to handle several inode blocks having same inode number with different directory paths. But, current f2fs doesn't replay all of path changes and only recover its dentry for the last fsynced inode block. So, there is no reason to do this. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 679c465e9def..63b720bd7e75 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage) } retry: de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) { - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_unmap_put; - } + if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { @@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) goto next; entry = get_fsync_inode(head, ino_of_node(page)); - if (entry) { - if (IS_INODE(page) && is_dent_dnode(page)) - set_inode_flag(F2FS_I(entry->inode), - FI_INC_LINK); - } else { + if (!entry) { if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) -- cgit v1.2.3 From e03b07d9084d03e896b7f1a598a7f6aa18f6eeda Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Apr 2015 19:38:20 -0700 Subject: f2fs: do not recover wrong data index During the roll-forward recovery, if we found a new data index written fsync lastly, we need to recover new block address. But, if that address was corrupted, we should not recover that. Otherwise, f2fs gets kernel panic from: In check_index_in_prev_nodes(), sentry = get_seg_entry(sbi, segno); --------------------------> out-of-range segno. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 63b720bd7e75..4b742c96c223 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -395,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, src = datablock_addr(dn.node_page, dn.ofs_in_node); dest = datablock_addr(page, dn.ofs_in_node); - if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { + if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && + dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) { + if (src == NULL_ADDR) { err = reserve_new_block(&dn); /* We should not get -ENOSPC */ -- cgit v1.2.3 From 9df47ba759e40ea2facd0601d4888abb37ed9658 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 13 Apr 2015 21:48:06 +0900 Subject: f2fs: change 0 to false for bool type in the f2fs_fill_super function, variable "retry" is bool type i think that it should be set as false. Signed-off-by: Taehee Yoo Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 45aa843fa597..160b88346b24 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1230,7 +1230,7 @@ free_sbi: /* give only one another chance */ if (retry) { - retry = 0; + retry = false; shrink_dcache_sb(sb); goto try_onemore; } -- cgit v1.2.3 From d0cae97cb600d84a7e00df6f83ab3b2f60d8d7f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Apr 2015 13:37:53 -0700 Subject: f2fs: flush symlink path to avoid broken symlink after POR This patch tries to avoid broken symlink case after POR in best effort. This results in performance regression. But, if f2fs has inline_data and the target path is under 3KB-sized long, the page would be stored in its inode_block, so that there would be no performance regression. Note that, if user wants to keep this file atomically, it needs to trigger dir->fsync. And, there is still a hole to produce broken symlink. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 8055e30eb14d..9f0eec4677fe 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -324,6 +324,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + /* + * Let's flush symlink data in order to avoid broken symlink as much as + * possible. Nevertheless, fsyncing is the best way, but there is no + * way to get a file descriptor in order to flush that. + * + * Note that, it needs to do dir->fsync to make this recoverable. + * If the symlink path is stored into inline_data, there is no + * performance regression. + */ + filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1); + if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); return err; -- cgit v1.2.3 From feb7cbb079e63ebb7c0bd7022d2ba9c1dd15c69b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Apr 2015 13:49:55 -0700 Subject: f2fs: avoid abnormal behavior on broken symlink When f2fs_symlink was triggered and checkpoint was done before syncing its link path, f2fs can get broken symlink like "xxx -> \0\0\0". This incurs abnormal path_walk by VFS. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 9f0eec4677fe..407dde3d7a92 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -295,6 +296,23 @@ fail: return err; } +static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct page *page; + + page = page_follow_link_light(dentry, nd); + if (IS_ERR(page)) + return page; + + /* this is broken symlink case */ + if (*nd_get_link(nd) == 0) { + kunmap(page); + page_cache_release(page); + return ERR_PTR(-ENOENT); + } + return page; +} + static int f2fs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { @@ -790,7 +808,7 @@ const struct inode_operations f2fs_dir_inode_operations = { const struct inode_operations f2fs_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = page_follow_link_light, + .follow_link = f2fs_follow_link, .put_link = page_put_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, -- cgit v1.2.3 From 10027551ccf5459cc771c31ac8bc8e5cc8db45f8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Apr 2015 17:03:53 -0700 Subject: f2fs: pass checkpoint reason on roll-forward recovery This patch adds CP_RECOVERY to remain recovery information for checkpoint. And, it makes sure writing checkpoint in this case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 2 +- include/trace/events/f2fs.h | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 384bfc4c36c3..a5e17a2a0781 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1051,7 +1051,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) + (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC)) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; @@ -1086,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); + + if (cpc->reason == CP_RECOVERY) + f2fs_msg(sbi->sb, KERN_NOTICE, + "checkpoint: version = %llx", ckpt_ver); out: mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 053361ace0ec..c06a25e5cec3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -103,6 +103,7 @@ enum { CP_UMOUNT, CP_FASTBOOT, CP_SYNC, + CP_RECOVERY, CP_DISCARD, }; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4b742c96c223..8d8ea99f2156 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -564,7 +564,7 @@ out: mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { struct cp_control cpc = { - .reason = CP_SYNC, + .reason = CP_RECOVERY, }; mutex_unlock(&sbi->cp_mutex); write_checkpoint(sbi, &cpc); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 75724bd28356..8804f22a08d1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -78,6 +78,7 @@ { CP_UMOUNT, "Umount" }, \ { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ + { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }) struct victim_sel_policy; -- cgit v1.2.3