From 96f1e097457506f215adfe3c47aacc15a88f6dd7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Dec 2018 23:16:07 -0500 Subject: jbd2: avoid long hold times of j_state_lock while committing a transaction We can hold j_state_lock for writing at the beginning of jbd2_journal_commit_transaction() for a rather long time (reportedly for 30 ms) due cleaning revoke bits of all revoked buffers under it. The handling of revoke tables as well as cleaning of t_reserved_list, and checkpoint lists does not need j_state_lock for anything. It is only needed to prevent new handles from joining the transaction. Generally T_LOCKED transaction state prevents new handles from joining the transaction - except for reserved handles which have to allowed to join while we wait for other handles to complete. To prevent reserved handles from joining the transaction while cleaning up lists, add new transaction state T_SWITCH and watch for it when starting reserved handles. With this we can just drop the lock for operations that don't need it. Reported-and-tested-by: Adrian Hunter Suggested-by: "Theodore Y. Ts'o" Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b708e5169d1d..118d00a64184 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -575,6 +575,7 @@ struct transaction_s enum { T_RUNNING, T_LOCKED, + T_SWITCH, T_FLUSH, T_COMMIT, T_COMMIT_DFLUSH, -- cgit v1.2.3 From 32ea275008d8c76fa3f40d10d0ffc694a214dfef Mon Sep 17 00:00:00 2001 From: Alexander Lochmann Date: Tue, 4 Dec 2018 00:30:22 -0500 Subject: jbd2: update locking documentation for transaction_t The following members of struct transaction_s aka transaction_t were turned into lock-free variables in the past: - t_updates - t_outstanding_credits - t_handle_count However, the documentation has not been updated yet. This commit replaced the annotated lock by [none]. Found by LockDoc (Alexander Lochmann, Horst Schirmeier and Olaf Spinczyk) Signed-off-by: Alexander Lochmann Signed-off-by: Horst Schirmeier Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 118d00a64184..0f919d5fe84f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -663,13 +663,13 @@ struct transaction_s /* * Number of outstanding updates running on this transaction - * [t_handle_lock] + * [none] */ atomic_t t_updates; /* * Number of buffers reserved for use by all handles in this transaction - * handle but not yet modified. [t_handle_lock] + * handle but not yet modified. [none] */ atomic_t t_outstanding_credits; @@ -691,7 +691,7 @@ struct transaction_s ktime_t t_start_time; /* - * How many handles used this transaction? [t_handle_lock] + * How many handles used this transaction? [none] */ atomic_t t_handle_count; -- cgit v1.2.3 From fde872682e175743e0c3ef939c89e3c6008a1529 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Dec 2018 14:07:58 -0500 Subject: ext4: force inode writes when nfsd calls commit_metadata() Some time back, nfsd switched from calling vfs_fsync() to using a new commit_metadata() hook in export_operations(). If the file system did not provide a commit_metadata() hook, it fell back to using sync_inode_metadata(). Unfortunately doesn't work on all file systems. In particular, it doesn't work on ext4 due to how the inode gets journalled --- the VFS writeback code will not always call ext4_write_inode(). So we need to provide our own ext4_nfs_commit_metdata() method which calls ext4_write_inode() directly. Google-Bug-Id: 121195940 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 11 +++++++++++ include/trace/events/ext4.h | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fc9071081600..d6c142d73d99 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1202,6 +1202,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, ext4_nfs_get_inode); } +static int ext4_nfs_commit_metadata(struct inode *inode) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL + }; + + trace_ext4_nfs_commit_metadata(inode); + return ext4_write_inode(inode, &wbc); +} + /* * Try to release metadata pages (indirect blocks, directories) which are * mapped via the block device. Since these pages could have journal heads @@ -1406,6 +1416,7 @@ static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, .get_parent = ext4_get_parent, + .commit_metadata = ext4_nfs_commit_metadata, }; enum { diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 698e0d8a5ca4..d68e9e536814 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -226,6 +226,26 @@ TRACE_EVENT(ext4_drop_inode, (unsigned long) __entry->ino, __entry->drop) ); +TRACE_EVENT(ext4_nfs_commit_metadata, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + ), + + TP_printk("dev %d,%d ino %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino) +); + TRACE_EVENT(ext4_mark_inode_dirty, TP_PROTO(struct inode *inode, unsigned long IP), -- cgit v1.2.3