From c412a97cf6c5253fcf4ae5545be5775b2417d61b Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Mon, 22 Aug 2022 11:30:12 -0500
Subject: gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED inodes

Before this patch, delete_work_func() would check for the GLF_DEMOTE
flag on the iopen glock and if set, it would perform special processing.
However, there was a race whereby the GLF_DEMOTE flag could be set by
another process after the check. Then when it called
gfs2_lookup_by_inum() which calls gfs2_inode_lookup(), it tried to lock
the iopen glock in SH mode, but the GLF_DEMOTE flag prevented the
request from being granted. But the iopen glock could never be demoted
because that happens when the inode is evicted, and the evict was never
completed because of the failed lookup.

To fix that, change function gfs2_inode_lookup() so that when
GFS2_BLKST_UNLINKED inodes are searched, it uses the LM_FLAG_TRY flag
for the iopen glock.  If the locking request fails, fail
gfs2_inode_lookup() with -EAGAIN so that delete_work_func() can retry
the operation later.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c |  8 +++++---
 fs/gfs2/inode.c | 10 ++++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 41b6c89e4bf7..57dcfd05b362 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1018,16 +1018,18 @@ static void delete_work_func(struct work_struct *work)
 			if (gfs2_queue_delete_work(gl, 5 * HZ))
 				return;
 		}
-		goto out;
 	}
 
 	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
 				    GFS2_BLKST_UNLINKED);
-	if (!IS_ERR_OR_NULL(inode)) {
+	if (IS_ERR(inode)) {
+		if (PTR_ERR(inode) == -EAGAIN &&
+			(gfs2_queue_delete_work(gl, 5 * HZ)))
+				return;
+	} else {
 		d_prune_aliases(inode);
 		iput(inode);
 	}
-out:
 	gfs2_glock_put(gl);
 }
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c8ec876f33ea..56ded979988c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -130,6 +130,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
 		struct gfs2_glock *io_gl;
+		int extra_flags = 0;
 
 		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE,
 				       &ip->i_gl);
@@ -141,9 +142,12 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		if (unlikely(error))
 			goto fail;
 
-		if (blktype != GFS2_BLKST_UNLINKED)
+		if (blktype == GFS2_BLKST_UNLINKED)
+			extra_flags |= LM_FLAG_TRY;
+		else
 			gfs2_cancel_delete_work(io_gl);
-		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT,
+		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED,
+					   GL_EXACT | extra_flags,
 					   &ip->i_iopen_gh);
 		gfs2_glock_put(io_gl);
 		if (unlikely(error))
@@ -210,6 +214,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 	return inode;
 
 fail:
+	if (error == GLR_TRYFAILED)
+		error = -EAGAIN;
 	if (gfs2_holder_initialized(&ip->i_iopen_gh))
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 	if (gfs2_holder_initialized(&i_gh))
-- 
cgit v1.2.3


From 04133b607a78f2fd3daadbe5519513942b0f3a05 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 18 Aug 2022 13:32:36 -0500
Subject: gfs2: Prevent double iput for journal on error

When a gfs2 file system is withdrawn it does iput on its journal to
allow recovery from another cluster node. If it's unable to get a
replacement inode for whatever reason, the journal descriptor would
still be pointing at the evicted inode. So when unmount clears out the
list of journals, it would do a second iput referencing the pointer.
To avoid this, set the inode pointer to NULL.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/util.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 8241029a2a5d..95c79a3ec161 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -204,6 +204,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
 	 * exception code in glock_dq.
 	 */
 	iput(inode);
+	sdp->sd_jdesc->jd_inode = NULL;
 	/*
 	 * Wait until the journal inode's glock is freed. This allows try locks
 	 * on other nodes to be successful, otherwise we remain the owner of
-- 
cgit v1.2.3


From 053640a73838400dca23087d66a9c0db579adafb Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 18 Aug 2022 13:32:37 -0500
Subject: gfs2: Dequeue waiters when withdrawn

When a withdraw occurs, ordinary (not system) glocks may not be granted
anymore. Later, when the file system is unmounted, gfs2_gl_hash_clear()
tries to clear out all the glocks, but these un-grantable pending
waiters prevent some glocks from being freed. So the unmount hangs, at
least for its ten-minute timeout period.

This patch takes measures to remove any pending waiters from
the glocks that will never be granted. This allows the unmount to
proceed in a reasonable period of time.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 14 ++++++++++++++
 fs/gfs2/glock.h |  1 +
 fs/gfs2/util.c  |  5 +++++
 3 files changed, 20 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 57dcfd05b362..858616afcae6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2196,6 +2196,20 @@ static void dump_glock_func(struct gfs2_glock *gl)
 	dump_glock(NULL, gl, true);
 }
 
+static void withdraw_dq(struct gfs2_glock *gl)
+{
+	spin_lock(&gl->gl_lockref.lock);
+	if (!__lockref_is_dead(&gl->gl_lockref) &&
+	    glock_blocked_by_withdraw(gl))
+		do_error(gl, LM_OUT_ERROR); /* remove pending waiters */
+	spin_unlock(&gl->gl_lockref.lock);
+}
+
+void gfs2_gl_dq_holders(struct gfs2_sbd *sdp)
+{
+	glock_hash_walk(withdraw_dq, sdp);
+}
+
 /**
  * gfs2_gl_hash_clear - Empty out the glock hash table
  * @sdp: the filesystem
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 5aed8b500cf5..0199a3dcb114 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -274,6 +274,7 @@ extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
 extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
 extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
 extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
 extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
 extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
 extern void gfs2_glock_free(struct gfs2_glock *gl);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 95c79a3ec161..88185a341504 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -164,6 +164,11 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
 		}
 		if (!ret)
 			gfs2_make_fs_ro(sdp);
+		/*
+		 * Dequeue any pending non-system glock holders that can no
+		 * longer be granted because the file system is withdrawn.
+		 */
+		gfs2_gl_dq_holders(sdp);
 		gfs2_freeze_unlock(&freeze_gh);
 	}
 
-- 
cgit v1.2.3


From 86934198eefa10a71f35162b06c44c36d85b98ba Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 18 Aug 2022 13:32:38 -0500
Subject: gfs2: Clear flags when withdraw prevents xmote

There are a couple places in function do_xmote where normal processing
is circumvented due to withdraws in progress. However, since we bypass
most of do_xmote() we bypass telling dlm to lock the dlm lock, which
means dlm will never respond with a completion callback. Since the
completion callback ordinarily clears GLF_LOCK, this patch changes
function do_xmote to handle those situations more gracefully so the
file system may be unmounted after withdraw.

A very similar situation happens with the GLF_DEMOTE_IN_PROGRESS flag,
which is cleared by function finish_xmote(). Since the withdraw causes
us to skip the majority of do_xmote, it therefore also skips the call
to finish_xmote() so the DEMOTE_IN_PROGRESS flag needs to be cleared
manually.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 858616afcae6..dca2cbf0338c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -59,6 +59,8 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
 static void __gfs2_glock_dq(struct gfs2_holder *gh);
+static void handle_callback(struct gfs2_glock *gl, unsigned int state,
+			    unsigned long delay, bool remote);
 
 static struct dentry *gfs2_root;
 static struct workqueue_struct *glock_workqueue;
@@ -730,7 +732,8 @@ static bool is_system_glock(struct gfs2_glock *gl)
  *
  */
 
-static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
+static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh,
+					 unsigned int target)
 __releases(&gl->gl_lockref.lock)
 __acquires(&gl->gl_lockref.lock)
 {
@@ -741,7 +744,8 @@ __acquires(&gl->gl_lockref.lock)
 
 	if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
 	    gh && !(gh->gh_flags & LM_FLAG_NOEXP))
-		return;
+		goto skip_inval;
+
 	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
 		      LM_FLAG_PRIORITY);
 	GLOCK_BUG_ON(gl, gl->gl_state == target);
@@ -826,6 +830,20 @@ skip_inval:
 	    (target != LM_ST_UNLOCKED ||
 	     test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
 		if (!is_system_glock(gl)) {
+			handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */
+			/*
+			 * Ordinarily, we would call dlm and its callback would call
+			 * finish_xmote, which would call state_change() to the new state.
+			 * Since we withdrew, we won't call dlm, so call state_change
+			 * manually, but to the UNLOCKED state we desire.
+			 */
+			state_change(gl, LM_ST_UNLOCKED);
+			/*
+			 * We skip telling dlm to do the locking, so we won't get a
+			 * reply that would otherwise clear GLF_LOCK. So we clear it here.
+			 */
+			clear_bit(GLF_LOCK, &gl->gl_flags);
+			clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
 			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
 			goto out;
 		} else {
-- 
cgit v1.2.3


From 204c0300c4e99707e9fb6e57840aa1127060e63f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 26 Aug 2022 15:12:17 +0200
Subject: gfs2: Switch from strlcpy to strscpy

Switch from strlcpy to strscpy and make sure that @count is the size of
the smaller of the source and destination buffers.  This prevents
reading beyond the end of the source buffer when the source string isn't
null terminated.

Found by a modified version of syzkaller.

Suggested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/ops_fstype.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 549879929c84..236b59ef93b6 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -381,8 +381,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
 	if (!table[0])
 		table = sdp->sd_vfs->s_id;
 
-	strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN);
-	strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN);
+	BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN);
+
+	strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN);
+	strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN);
 
 	table = sdp->sd_table_name;
 	while ((table = strchr(table, '/')))
@@ -1439,13 +1441,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 
 	switch (o) {
 	case Opt_lockproto:
-		strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+		strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
 		break;
 	case Opt_locktable:
-		strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+		strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
 		break;
 	case Opt_hostdata:
-		strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+		strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
 		break;
 	case Opt_spectator:
 		args->ar_spectator = 1;
-- 
cgit v1.2.3


From 670f8ce56dd0632dc29a0322e188cc73ce3c6b92 Mon Sep 17 00:00:00 2001
From: Andrew Price <anprice@redhat.com>
Date: Wed, 17 Aug 2022 13:22:00 +0100
Subject: gfs2: Check sb_bsize_shift after reading superblock

Fuzzers like to scribble over sb_bsize_shift but in reality it's very
unlikely that this field would be corrupted on its own. Nevertheless it
should be checked to avoid the possibility of messy mount errors due to
bad calculations. It's always a fixed value based on the block size so
we can just check that it's the expected value.

Tested with:

    mkfs.gfs2 -O -p lock_nolock /dev/vdb
    for i in 0 -1 64 65 32 33; do
        gfs2_edit -p sb field sb_bsize_shift $i /dev/vdb
        mount /dev/vdb /mnt/test && umount /mnt/test
    done

Before this patch we get a withdraw after

[   76.413681] gfs2: fsid=loop0.0: fatal: invalid metadata block
[   76.413681]   bh = 19 (type: exp=5, found=4)
[   76.413681]   function = gfs2_meta_buffer, file = fs/gfs2/meta_io.c, line = 492

and with UBSAN configured we also get complaints like

[   76.373395] UBSAN: shift-out-of-bounds in fs/gfs2/ops_fstype.c:295:19
[   76.373815] shift exponent 4294967287 is too large for 64-bit type 'long unsigned int'

After the patch, these complaints don't appear, mount fails immediately
and we get an explanation in dmesg.

Reported-by: syzbot+dcf33a7aae997956fe06@syzkaller.appspotmail.com
Signed-off-by: Andrew Price <anprice@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/ops_fstype.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 236b59ef93b6..c7e2e6238366 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -178,7 +178,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
 		pr_warn("Invalid block size\n");
 		return -EINVAL;
 	}
-
+	if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) {
+		pr_warn("Invalid block size shift\n");
+		return -EINVAL;
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 74b1b10e29b1f25e1a081fa82733baea65429d53 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 30 Aug 2022 13:52:13 -0500
Subject: gfs2: Register fs after creating workqueues

Before this patch, the gfs2 file system was registered prior to creating
the three workqueues. In some cases this allowed dlm to send recovery
work to a workqueue that did not yet exist because gfs2 was still
initializing.

This patch changes the order of gfs2's initialization routine so it only
registers the file system after the work queues are created.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/main.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 14ae9de76277..afcb32854f14 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -151,14 +151,6 @@ static int __init init_gfs2_fs(void)
 	if (error)
 		goto fail_shrinker;
 
-	error = register_filesystem(&gfs2_fs_type);
-	if (error)
-		goto fail_fs1;
-
-	error = register_filesystem(&gfs2meta_fs_type);
-	if (error)
-		goto fail_fs2;
-
 	error = -ENOMEM;
 	gfs_recovery_wq = alloc_workqueue("gfs_recovery",
 					  WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
@@ -180,11 +172,23 @@ static int __init init_gfs2_fs(void)
 		goto fail_mempool;
 
 	gfs2_register_debugfs();
+	error = register_filesystem(&gfs2_fs_type);
+	if (error)
+		goto fail_fs1;
+
+	error = register_filesystem(&gfs2meta_fs_type);
+	if (error)
+		goto fail_fs2;
+
 
 	pr_info("GFS2 installed\n");
 
 	return 0;
 
+fail_fs2:
+	unregister_filesystem(&gfs2_fs_type);
+fail_fs1:
+	mempool_destroy(gfs2_page_pool);
 fail_mempool:
 	destroy_workqueue(gfs2_freeze_wq);
 fail_wq3:
@@ -192,10 +196,6 @@ fail_wq3:
 fail_wq2:
 	destroy_workqueue(gfs_recovery_wq);
 fail_wq1:
-	unregister_filesystem(&gfs2meta_fs_type);
-fail_fs2:
-	unregister_filesystem(&gfs2_fs_type);
-fail_fs1:
 	unregister_shrinker(&gfs2_qd_shrinker);
 fail_shrinker:
 	kmem_cache_destroy(gfs2_trans_cachep);
-- 
cgit v1.2.3