From 675f10bde6cc3874632a8f684df2a8a2a8ace76e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Feb 2016 18:29:18 +0800 Subject: f2fs: fix to convert inline directory correctly With below serials, we will lose parts of dirents: 1) mount f2fs with inline_dentry option 2) echo 1 > /sys/fs/f2fs/sdX/dir_level 3) mkdir dir 4) touch 180 files named [1-180] in dir 5) touch 181 in dir 6) echo 3 > /proc/sys/vm/drop_caches 7) ll dir ls: cannot access 2: No such file or directory ls: cannot access 4: No such file or directory ls: cannot access 5: No such file or directory ls: cannot access 6: No such file or directory ls: cannot access 8: No such file or directory ls: cannot access 9: No such file or directory ... total 360 drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./ drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../ -rw-r--r-- 1 root root 0 Feb 19 15:12 1 -rw-r--r-- 1 root root 0 Feb 19 15:12 10 -rw-r--r-- 1 root root 0 Feb 19 15:12 100 -????????? ? ? ? ? ? 101 -????????? ? ? ? ? ? 102 -????????? ? ? ? ? ? 103 ... The reason is: when doing the inline dir conversion, we didn't consider that directory has hierarchical hash structure which can be configured through sysfs interface 'dir_level'. By default, dir_level of directory inode is 0, it means we have one bucket in hash table located in first level, all dirents will be hashed in this bucket, so it has no problem for us to do the duplication simply between inline dentry page and converted normal dentry page. However, if we configured dir_level with the value N (greater than 0), it will expand the bucket number of first level hash table by 2^N - 1, it hashs dirents into different buckets according their hash value, if we still move all dirents to first bucket, it makes incorrent locating for inline dirents, the result is, although we can iterate all dirents through ->readdir, we can't stat some of them in ->lookup which based on hash table searching. This patch fixes this issue by rehashing dirents into correct position when converting inline directory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b90e9bdbd1dd..4c02c6521fef 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -508,4 +508,6 @@ enum { F2FS_FT_MAX }; +#define S_SHIFT 12 + #endif /* _LINUX_F2FS_FS_H */ -- cgit v1.2.3 From b5a7aef1ef436ec005fef0efe31a676ec5f4ab31 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 4 May 2016 22:05:01 -0700 Subject: fscrypto/f2fs: allow fs-specific key prefix for fs encryption This patch allows fscrypto to handle a second key prefix given by filesystem. The main reason is to provide backward compatibility, since previously f2fs used "f2fs:" as a crypto prefix instead of "fscrypt:". Later, ext4 should also provide key_prefix() to give "ext4:". One concern decribed by Ted would be kinda double check overhead of prefixes. In x86, for example, validate_user_key consumes 8 ms after boot-up, which turns out derive_key_aes() consumed most of the time to load specific crypto module. After such the cold miss, it shows almost zero latencies, which treats as a negligible overhead. Note that request_key() detects wrong prefix in prior to derive_key_aes() even. Cc: Ted Tso Cc: stable@vger.kernel.org # v4.6 Signed-off-by: Jaegeuk Kim --- fs/crypto/keyinfo.c | 120 ++++++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 8 ++++ fs/f2fs/super.c | 13 +++++ include/linux/fscrypto.h | 1 + 4 files changed, 98 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 06f5aa478bf2..1ac263eddc4e 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -78,6 +78,67 @@ out: return res; } +static int validate_user_key(struct fscrypt_info *crypt_info, + struct fscrypt_context *ctx, u8 *raw_key, + u8 *prefix, int prefix_size) +{ + u8 *full_key_descriptor; + struct key *keyring_key; + struct fscrypt_key *master_key; + const struct user_key_payload *ukp; + int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1; + int res; + + full_key_descriptor = kmalloc(full_key_len, GFP_NOFS); + if (!full_key_descriptor) + return -ENOMEM; + + memcpy(full_key_descriptor, prefix, prefix_size); + sprintf(full_key_descriptor + prefix_size, + "%*phN", FS_KEY_DESCRIPTOR_SIZE, + ctx->master_key_descriptor); + full_key_descriptor[full_key_len - 1] = '\0'; + keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); + kfree(full_key_descriptor); + if (IS_ERR(keyring_key)) + return PTR_ERR(keyring_key); + + if (keyring_key->type != &key_type_logon) { + printk_once(KERN_WARNING + "%s: key type must be logon\n", __func__); + res = -ENOKEY; + goto out; + } + down_read(&keyring_key->sem); + ukp = user_key_payload(keyring_key); + if (ukp->datalen != sizeof(struct fscrypt_key)) { + res = -EINVAL; + up_read(&keyring_key->sem); + goto out; + } + master_key = (struct fscrypt_key *)ukp->data; + BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); + + if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { + printk_once(KERN_WARNING + "%s: key size incorrect: %d\n", + __func__, master_key->size); + res = -ENOKEY; + up_read(&keyring_key->sem); + goto out; + } + res = derive_key_aes(ctx->nonce, master_key->raw, raw_key); + up_read(&keyring_key->sem); + if (res) + goto out; + + crypt_info->ci_keyring_key = keyring_key; + return 0; +out: + key_put(keyring_key); + return res; +} + static void put_crypt_info(struct fscrypt_info *ci) { if (!ci) @@ -91,12 +152,7 @@ static void put_crypt_info(struct fscrypt_info *ci) int get_crypt_info(struct inode *inode) { struct fscrypt_info *crypt_info; - u8 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE + - (FS_KEY_DESCRIPTOR_SIZE * 2) + 1]; - struct key *keyring_key = NULL; - struct fscrypt_key *master_key; struct fscrypt_context ctx; - const struct user_key_payload *ukp; struct crypto_skcipher *ctfm; const char *cipher_str; u8 raw_key[FS_MAX_KEY_SIZE]; @@ -167,48 +223,24 @@ retry: memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); goto got_key; } - memcpy(full_key_descriptor, FS_KEY_DESC_PREFIX, - FS_KEY_DESC_PREFIX_SIZE); - sprintf(full_key_descriptor + FS_KEY_DESC_PREFIX_SIZE, - "%*phN", FS_KEY_DESCRIPTOR_SIZE, - ctx.master_key_descriptor); - full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE + - (2 * FS_KEY_DESCRIPTOR_SIZE)] = '\0'; - keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); - if (IS_ERR(keyring_key)) { - res = PTR_ERR(keyring_key); - keyring_key = NULL; - goto out; - } - crypt_info->ci_keyring_key = keyring_key; - if (keyring_key->type != &key_type_logon) { - printk_once(KERN_WARNING - "%s: key type must be logon\n", __func__); - res = -ENOKEY; - goto out; - } - down_read(&keyring_key->sem); - ukp = user_key_payload(keyring_key); - if (ukp->datalen != sizeof(struct fscrypt_key)) { - res = -EINVAL; - up_read(&keyring_key->sem); - goto out; - } - master_key = (struct fscrypt_key *)ukp->data; - BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE); - if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { - printk_once(KERN_WARNING - "%s: key size incorrect: %d\n", - __func__, master_key->size); - res = -ENOKEY; - up_read(&keyring_key->sem); + res = validate_user_key(crypt_info, &ctx, raw_key, + FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE); + if (res && inode->i_sb->s_cop->key_prefix) { + u8 *prefix = NULL; + int prefix_size, res2; + + prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix); + res2 = validate_user_key(crypt_info, &ctx, raw_key, + prefix, prefix_size); + if (res2) { + if (res2 == -ENOKEY) + res = -ENOKEY; + goto out; + } + } else if (res) { goto out; } - res = derive_key_aes(ctx.nonce, master_key->raw, raw_key); - up_read(&keyring_key->sem); - if (res) - goto out; got_key: ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); if (!ctfm || IS_ERR(ctfm)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ccf8bf4debfc..dbd277eb9da7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -711,6 +711,10 @@ enum { MAX_TIME, }; +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#define F2FS_KEY_DESC_PREFIX "f2fs:" +#define F2FS_KEY_DESC_PREFIX_SIZE 5 +#endif struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -718,6 +722,10 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ int s_flag; /* flags for sbi */ +#ifdef CONFIG_F2FS_FS_ENCRYPTION + u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE]; + u8 key_prefix_size; +#endif /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ struct inode *node_inode; /* cache node blocks */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8a28f799a24f..28c8992da6f6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -964,6 +964,12 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) ctx, len, NULL); } +static int f2fs_key_prefix(struct inode *inode, u8 **key) +{ + *key = F2FS_I_SB(inode)->key_prefix; + return F2FS_I_SB(inode)->key_prefix_size; +} + static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { @@ -980,6 +986,7 @@ static unsigned f2fs_max_namelen(struct inode *inode) static struct fscrypt_operations f2fs_cryptops = { .get_context = f2fs_get_context, + .key_prefix = f2fs_key_prefix, .set_context = f2fs_set_context, .is_encrypted = f2fs_encrypted_inode, .empty_dir = f2fs_empty_dir, @@ -1305,6 +1312,12 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); + +#ifdef CONFIG_F2FS_FS_ENCRYPTION + memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, + F2FS_KEY_DESC_PREFIX_SIZE); + sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE; +#endif } /* diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index 6027f6bbb061..cfa6cde25f8e 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -175,6 +175,7 @@ struct fscrypt_name { */ struct fscrypt_operations { int (*get_context)(struct inode *, void *, size_t); + int (*key_prefix)(struct inode *, u8 **); int (*prepare_context)(struct inode *); int (*set_context)(struct inode *, const void *, size_t, void *); int (*dummy_context)(struct inode *); -- cgit v1.2.3 From 46008c6d42328710f9beaf5c2b47dc92b1cc1a75 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 May 2016 19:56:30 +0800 Subject: f2fs: support in batch multi blocks preallocation This patch introduces reserve_new_blocks to make preallocation of multi blocks as in batch operation, so it can avoid lots of redundant operation, result in better performance. In virtual machine, with rotational device: time fallocate -l 32G /mnt/f2fs/file Before: real 0m4.584s user 0m0.000s sys 0m4.580s After: real 0m0.292s user 0m0.000s sys 0m0.272s In x86, with SSD: time fallocate -l 500G $MNT/testfile Before : 24.758 s After : 1.604 s Signed-off-by: Chao Yu [Jaegeuk Kim: fix bugs and add performance numbers measured in x86.] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 132 +++++++++++++++++++++++++++++++------------- fs/f2fs/f2fs.h | 20 ++++--- include/trace/events/f2fs.h | 14 +++-- 3 files changed, 117 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 369d953bd770..b61e2d40cdfb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -278,6 +278,16 @@ alloc_new: trace_f2fs_submit_page_mbio(fio->page, fio); } +static void __set_data_blkaddr(struct dnode_of_data *dn) +{ + struct f2fs_node *rn = F2FS_NODE(dn->node_page); + __le32 *addr_array; + + /* Get physical address of data block */ + addr_array = blkaddr_in_node(rn); + addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); +} + /* * Lock ordering for the change of data block address: * ->data_page @@ -286,19 +296,9 @@ alloc_new: */ void set_data_blkaddr(struct dnode_of_data *dn) { - struct f2fs_node *rn; - __le32 *addr_array; - struct page *node_page = dn->node_page; - unsigned int ofs_in_node = dn->ofs_in_node; - - f2fs_wait_on_page_writeback(node_page, NODE, true); - - rn = F2FS_NODE(node_page); - - /* Get physical address of data block */ - addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); - if (set_page_dirty(node_page)) + f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + __set_data_blkaddr(dn); + if (set_page_dirty(dn->node_page)) dn->node_changed = true; } @@ -309,24 +309,53 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) f2fs_update_extent_cache(dn); } -int reserve_new_block(struct dnode_of_data *dn) +/* dn->ofs_in_node will be returned with up-to-date last block pointer */ +int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + if (!count) + return 0; + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) + if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) return -ENOSPC; - trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); + trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, + dn->ofs_in_node, count); + + f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + + for (; count > 0; dn->ofs_in_node++) { + block_t blkaddr = + datablock_addr(dn->node_page, dn->ofs_in_node); + if (blkaddr == NULL_ADDR) { + dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); + count--; + } + } + + if (set_page_dirty(dn->node_page)) + dn->node_changed = true; - dn->data_blkaddr = NEW_ADDR; - set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; } +/* Should keep dn->ofs_in_node unchanged */ +int reserve_new_block(struct dnode_of_data *dn) +{ + unsigned int ofs_in_node = dn->ofs_in_node; + int ret; + + ret = reserve_new_blocks(dn, 1); + dn->ofs_in_node = ofs_in_node; + return ret; +} + int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) { bool need_put = dn->inode_page ? false : true; @@ -545,6 +574,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct node_info ni; int seg = CURSEG_WARM_DATA; pgoff_t fofs; + blkcnt_t count = 1; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; @@ -553,7 +583,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (dn->data_blkaddr == NEW_ADDR) goto alloc; - if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) + if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) return -ENOSPC; alloc: @@ -621,8 +651,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; - pgoff_t pgofs, end_offset; + pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; + unsigned int ofs_in_node, last_ofs_in_node; + blkcnt_t prealloc; struct extent_info ei; bool allocated = false; block_t blkaddr; @@ -632,6 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, /* it only supports block size == page size */ pgofs = (pgoff_t)map->m_lblk; + end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { map->m_pblk = ei.blk + pgofs - ei.fofs; @@ -659,6 +692,8 @@ next_dnode: goto unlock_out; } + prealloc = 0; + ofs_in_node = dn.ofs_in_node; end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: @@ -671,17 +706,20 @@ next_block: goto sync_out; } if (flag == F2FS_GET_BLOCK_PRE_AIO) { - if (blkaddr == NULL_ADDR) - err = reserve_new_block(&dn); + if (blkaddr == NULL_ADDR) { + prealloc++; + last_ofs_in_node = dn.ofs_in_node; + } } else { err = __allocate_data_block(&dn); - if (!err) + if (!err) { set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + allocated = true; + } } if (err) goto sync_out; - allocated = true; map->m_flags = F2FS_MAP_NEW; blkaddr = dn.data_blkaddr; } else { @@ -700,6 +738,9 @@ next_block: } } + if (flag == F2FS_GET_BLOCK_PRE_AIO) + goto skip; + if (map->m_len == 0) { /* preallocated unwritten block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) @@ -711,32 +752,49 @@ next_block: } else if ((map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) || (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || - flag == F2FS_GET_BLOCK_PRE_DIO || - flag == F2FS_GET_BLOCK_PRE_AIO) { + flag == F2FS_GET_BLOCK_PRE_DIO) { ofs++; map->m_len++; } else { goto sync_out; } +skip: dn.ofs_in_node++; pgofs++; - if (map->m_len < maxblocks) { - if (dn.ofs_in_node < end_offset) - goto next_block; + /* preallocate blocks in batch for one dnode page */ + if (flag == F2FS_GET_BLOCK_PRE_AIO && + (pgofs == end || dn.ofs_in_node == end_offset)) { - if (allocated) - sync_inode_page(&dn); - f2fs_put_dnode(&dn); + dn.ofs_in_node = ofs_in_node; + err = reserve_new_blocks(&dn, prealloc); + if (err) + goto sync_out; - if (create) { - f2fs_unlock_op(sbi); - f2fs_balance_fs(sbi, allocated); + map->m_len += dn.ofs_in_node - ofs_in_node; + if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { + err = -ENOSPC; + goto sync_out; } - allocated = false; - goto next_dnode; + dn.ofs_in_node = end_offset; + } + + if (pgofs >= end) + goto sync_out; + else if (dn.ofs_in_node < end_offset) + goto next_block; + + if (allocated) + sync_inode_page(&dn); + f2fs_put_dnode(&dn); + + if (create) { + f2fs_unlock_op(sbi); + f2fs_balance_fs(sbi, allocated); } + allocated = false; + goto next_dnode; sync_out: if (allocated) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 052f5a8c96f1..1401c96724c6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1094,7 +1094,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) } static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t count) + struct inode *inode, blkcnt_t *count) { block_t valid_block_count; @@ -1106,14 +1106,19 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, } #endif valid_block_count = - sbi->total_valid_block_count + (block_t)count; + sbi->total_valid_block_count + (block_t)(*count); if (unlikely(valid_block_count > sbi->user_block_count)) { - spin_unlock(&sbi->stat_lock); - return false; + *count = sbi->user_block_count - sbi->total_valid_block_count; + if (!*count) { + spin_unlock(&sbi->stat_lock); + return false; + } } - inode->i_blocks += count; - sbi->total_valid_block_count = valid_block_count; - sbi->alloc_valid_block_count += (block_t)count; + /* *count can be recalculated */ + inode->i_blocks += *count; + sbi->total_valid_block_count = + sbi->total_valid_block_count + (block_t)(*count); + sbi->alloc_valid_block_count += (block_t)(*count); spin_unlock(&sbi->stat_lock); return true; } @@ -1945,6 +1950,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *); void f2fs_submit_page_mbio(struct f2fs_io_info *); void set_data_blkaddr(struct dnode_of_data *); void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); +int reserve_new_blocks(struct dnode_of_data *, blkcnt_t); int reserve_new_block(struct dnode_of_data *); int f2fs_get_block(struct dnode_of_data *, pgoff_t); ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 0f565845707b..497e6e80fb63 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -694,28 +694,32 @@ TRACE_EVENT(f2fs_direct_IO_exit, __entry->ret) ); -TRACE_EVENT(f2fs_reserve_new_block, +TRACE_EVENT(f2fs_reserve_new_blocks, - TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), + TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node, + blkcnt_t count), - TP_ARGS(inode, nid, ofs_in_node), + TP_ARGS(inode, nid, ofs_in_node, count), TP_STRUCT__entry( __field(dev_t, dev) __field(nid_t, nid) __field(unsigned int, ofs_in_node) + __field(blkcnt_t, count) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->nid = nid; __entry->ofs_in_node = ofs_in_node; + __entry->count = count; ), - TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", + TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu", show_dev(__entry), (unsigned int)__entry->nid, - __entry->ofs_in_node) + __entry->ofs_in_node, + (unsigned long long)__entry->count) ); DECLARE_EVENT_CLASS(f2fs__submit_page_bio, -- cgit v1.2.3 From 523be8a6b3418eb7e0f0f042fe0490345eb5d516 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 May 2016 12:36:58 -0700 Subject: f2fs: use percpu_counter for page counters This patch substitutes percpu_counter for atomic_counter when counting various types of pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 11 ++++++----- fs/f2fs/f2fs.h | 24 ++++++++++++++---------- fs/f2fs/super.c | 31 +++++++++++++++++++++++++++---- include/trace/events/f2fs.h | 10 +++++----- 4 files changed, 52 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a18897319347..d89a425055d0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -144,6 +144,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); + si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; /* build sm */ si->base_mem += sizeof(struct f2fs_sm_info); @@ -299,15 +300,15 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - inmem: %4d, wb_bios: %4d\n", + seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n", si->inmem_pages, si->wb_bios); - seq_printf(s, " - nodes: %4d in %4d\n", + seq_printf(s, " - nodes: %4lld in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents: %4d in dirs:%4d\n", + seq_printf(s, " - dents: %4lld in dirs:%4d\n", si->ndirty_dent, si->ndirty_dirs); - seq_printf(s, " - datas: %4d in files:%4d\n", + seq_printf(s, " - datas: %4lld in files:%4d\n", si->ndirty_data, si->ndirty_files); - seq_printf(s, " - meta: %4d in %4d\n", + seq_printf(s, " - meta: %4lld in %4d\n", si->ndirty_meta, si->meta_pages); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bc45a2c61b61..c2d0b24ba6b8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -813,7 +813,9 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ u32 s_next_generation; /* for NFS support */ atomic_t nr_wb_bios; /* # of writeback bios */ - atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + /* # of pages, see count_type */ + struct percpu_counter nr_pages[NR_COUNT_TYPE]; struct f2fs_mount_info mount_opt; /* mount options */ @@ -1158,7 +1160,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { - atomic_inc(&sbi->nr_pages[count_type]); + percpu_counter_inc(&sbi->nr_pages[count_type]); set_sbi_flag(sbi, SBI_IS_DIRTY); } @@ -1171,7 +1173,7 @@ static inline void inode_inc_dirty_pages(struct inode *inode) static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) { - atomic_dec(&sbi->nr_pages[count_type]); + percpu_counter_dec(&sbi->nr_pages[count_type]); } static inline void inode_dec_dirty_pages(struct inode *inode) @@ -1185,9 +1187,9 @@ static inline void inode_dec_dirty_pages(struct inode *inode) F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); } -static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) { - return atomic_read(&sbi->nr_pages[count_type]); + return percpu_counter_sum_positive(&sbi->nr_pages[count_type]); } static inline int get_dirty_pages(struct inode *inode) @@ -1198,8 +1200,10 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; - return ((get_pages(sbi, block_type) + pages_per_sec - 1) - >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; + unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> + sbi->log_blocks_per_seg; + + return segs / sbi->segs_per_sec; } static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) @@ -2013,11 +2017,11 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - int ndirty_node, ndirty_meta; - int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; + s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; + unsigned int ndirty_dirs, ndirty_files; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; - int bg_gc, inmem_pages, wb_bios; + int bg_gc, wb_bios; int inline_xattr, inline_inode, inline_dir, orphans; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0e175609cf72..009a9f44205f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -606,6 +606,14 @@ static void f2fs_destroy_inode(struct inode *inode) call_rcu(&inode->i_rcu, f2fs_i_callback); } +static void destroy_percpu_info(struct f2fs_sb_info *sbi) +{ + int i; + + for (i = 0; i < NR_COUNT_TYPE; i++) + percpu_counter_destroy(&sbi->nr_pages[i]); +} + static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -666,6 +674,8 @@ static void f2fs_put_super(struct super_block *sb) if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); + + destroy_percpu_info(sbi); kfree(sbi); } @@ -1324,7 +1334,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -1344,9 +1353,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->cur_victim_sec = NULL_SECNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; - for (i = 0; i < NR_COUNT_TYPE; i++) - atomic_set(&sbi->nr_pages[i], 0); - sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; @@ -1362,6 +1368,18 @@ static void init_sb_info(struct f2fs_sb_info *sbi) #endif } +static int init_percpu_info(struct f2fs_sb_info *sbi) +{ + int i, err; + + for (i = 0; i < NR_COUNT_TYPE; i++) { + err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); + if (err) + return err; + } + return 0; +} + /* * Read f2fs raw super block. * Because we have two copies of super block, so read both of them @@ -1552,6 +1570,10 @@ try_onemore: init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); + err = init_percpu_info(sbi); + if (err) + goto free_options; + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { @@ -1754,6 +1776,7 @@ free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); free_options: + destroy_percpu_info(sbi); kfree(options); free_sb_buf: kfree(raw_super); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 497e6e80fb63..3a09bb4dc3b2 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1275,14 +1275,14 @@ TRACE_EVENT(f2fs_destroy_extent_tree, DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, - TP_PROTO(struct super_block *sb, int type, int count), + TP_PROTO(struct super_block *sb, int type, s64 count), TP_ARGS(sb, type, count), TP_STRUCT__entry( __field(dev_t, dev) __field(int, type) - __field(int, count) + __field(s64, count) ), TP_fast_assign( @@ -1291,7 +1291,7 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, __entry->count = count; ), - TP_printk("dev = (%d,%d), %s, dirty count = %d", + TP_printk("dev = (%d,%d), %s, dirty count = %lld", show_dev(__entry), show_file_type(__entry->type), __entry->count) @@ -1299,14 +1299,14 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter, - TP_PROTO(struct super_block *sb, int type, int count), + TP_PROTO(struct super_block *sb, int type, s64 count), TP_ARGS(sb, type, count) ); DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit, - TP_PROTO(struct super_block *sb, int type, int count), + TP_PROTO(struct super_block *sb, int type, s64 count), TP_ARGS(sb, type, count) ); -- cgit v1.2.3