From ab12313a9f56b939529abc80ac26bedefb3d5b62 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Jan 2021 10:34:56 +0000 Subject: btrfs: avoid logging new ancestor inodes when logging new inode When we fsync a new file, created in the current transaction, we check all its ancestor inodes and always log them if they were created in the current transaction - even if we have already logged them before, which is a waste of time. So avoid logging new ancestor inodes if they were already logged before and have no xattrs added/updated/removed since they were last logged. This patch is part of a patchset comprised of the following patches: btrfs: remove unnecessary directory inode item update when deleting dir entry btrfs: stop setting nbytes when filling inode item for logging btrfs: avoid logging new ancestor inodes when logging new inode btrfs: skip logging directories already logged when logging all parents btrfs: skip logging inodes already logged when logging new entries btrfs: remove unnecessary check_parent_dirs_for_sync() btrfs: make concurrent fsyncs wait less when waiting for a transaction commit Performance results, after applying all patches, are mentioned in the change log of the last patch. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index be62759f0aac..105cf316ee27 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5272,6 +5272,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (S_ISDIR(inode->vfs_inode.i_mode)) { int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; + clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); if (inode_only == LOG_INODE_EXISTS) max_key_type = BTRFS_XATTR_ITEM_KEY; ret = drop_objectid_items(trans, log, path, ino, max_key_type); @@ -5520,6 +5521,34 @@ out: return ret; } +/* + * Check if we need to log an inode. This is used in contexts where while + * logging an inode we need to log another inode (either that it exists or in + * full mode). This is used instead of btrfs_inode_in_log() because the later + * requires the inode to be in the log and have the log transaction committed, + * while here we do not care if the log transaction was already committed - our + * caller will commit the log later - and we want to avoid logging an inode + * multiple times when multiple tasks have joined the same log transaction. + */ +static bool need_log_inode(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode) +{ + /* + * If this inode does not have new/updated/deleted xattrs since the last + * time it was logged and is flagged as logged in the current transaction, + * we can skip logging it. As for new/deleted names, those are updated in + * the log by link/unlink/rename operations. + * In case the inode was logged and then evicted and reloaded, its + * logged_trans will be 0, in which case we have to fully log it since + * logged_trans is a transient field, not persisted. + */ + if (inode->logged_trans == trans->transid && + !test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags)) + return false; + + return true; +} + struct btrfs_dir_list { u64 ino; struct list_head list; @@ -5848,7 +5877,8 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, if (IS_ERR(inode)) return PTR_ERR(inode); - if (BTRFS_I(inode)->generation >= trans->transid) + if (BTRFS_I(inode)->generation >= trans->transid && + need_log_inode(trans, BTRFS_I(inode))) ret = btrfs_log_inode(trans, root, BTRFS_I(inode), LOG_INODE_EXISTS, ctx); btrfs_add_delayed_iput(inode); @@ -5902,7 +5932,8 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans, if (root != inode->root) break; - if (inode->generation >= trans->transid) { + if (inode->generation >= trans->transid && + need_log_inode(trans, inode)) { ret = btrfs_log_inode(trans, root, inode, LOG_INODE_EXISTS, ctx); if (ret) -- cgit v1.2.3