From c412a97cf6c5253fcf4ae5545be5775b2417d61b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 22 Aug 2022 11:30:12 -0500 Subject: gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED inodes Before this patch, delete_work_func() would check for the GLF_DEMOTE flag on the iopen glock and if set, it would perform special processing. However, there was a race whereby the GLF_DEMOTE flag could be set by another process after the check. Then when it called gfs2_lookup_by_inum() which calls gfs2_inode_lookup(), it tried to lock the iopen glock in SH mode, but the GLF_DEMOTE flag prevented the request from being granted. But the iopen glock could never be demoted because that happens when the inode is evicted, and the evict was never completed because of the failed lookup. To fix that, change function gfs2_inode_lookup() so that when GFS2_BLKST_UNLINKED inodes are searched, it uses the LM_FLAG_TRY flag for the iopen glock. If the locking request fails, fail gfs2_inode_lookup() with -EAGAIN so that delete_work_func() can retry the operation later. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 8 +++++--- fs/gfs2/inode.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 41b6c89e4bf7..57dcfd05b362 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1018,16 +1018,18 @@ static void delete_work_func(struct work_struct *work) if (gfs2_queue_delete_work(gl, 5 * HZ)) return; } - goto out; } inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, GFS2_BLKST_UNLINKED); - if (!IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { + if (PTR_ERR(inode) == -EAGAIN && + (gfs2_queue_delete_work(gl, 5 * HZ))) + return; + } else { d_prune_aliases(inode); iput(inode); } -out: gfs2_glock_put(gl); } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c8ec876f33ea..56ded979988c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -130,6 +130,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_glock *io_gl; + int extra_flags = 0; error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); @@ -141,9 +142,12 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail; - if (blktype != GFS2_BLKST_UNLINKED) + if (blktype == GFS2_BLKST_UNLINKED) + extra_flags |= LM_FLAG_TRY; + else gfs2_cancel_delete_work(io_gl); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, + GL_EXACT | extra_flags, &ip->i_iopen_gh); gfs2_glock_put(io_gl); if (unlikely(error)) @@ -210,6 +214,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail: + if (error == GLR_TRYFAILED) + error = -EAGAIN; if (gfs2_holder_initialized(&ip->i_iopen_gh)) gfs2_glock_dq_uninit(&ip->i_iopen_gh); if (gfs2_holder_initialized(&i_gh)) -- cgit v1.2.3 From 04133b607a78f2fd3daadbe5519513942b0f3a05 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 18 Aug 2022 13:32:36 -0500 Subject: gfs2: Prevent double iput for journal on error When a gfs2 file system is withdrawn it does iput on its journal to allow recovery from another cluster node. If it's unable to get a replacement inode for whatever reason, the journal descriptor would still be pointing at the evicted inode. So when unmount clears out the list of journals, it would do a second iput referencing the pointer. To avoid this, set the inode pointer to NULL. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 8241029a2a5d..95c79a3ec161 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -204,6 +204,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * exception code in glock_dq. */ iput(inode); + sdp->sd_jdesc->jd_inode = NULL; /* * Wait until the journal inode's glock is freed. This allows try locks * on other nodes to be successful, otherwise we remain the owner of -- cgit v1.2.3 From 053640a73838400dca23087d66a9c0db579adafb Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 18 Aug 2022 13:32:37 -0500 Subject: gfs2: Dequeue waiters when withdrawn When a withdraw occurs, ordinary (not system) glocks may not be granted anymore. Later, when the file system is unmounted, gfs2_gl_hash_clear() tries to clear out all the glocks, but these un-grantable pending waiters prevent some glocks from being freed. So the unmount hangs, at least for its ten-minute timeout period. This patch takes measures to remove any pending waiters from the glocks that will never be granted. This allows the unmount to proceed in a reasonable period of time. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 14 ++++++++++++++ fs/gfs2/glock.h | 1 + fs/gfs2/util.c | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 57dcfd05b362..858616afcae6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2196,6 +2196,20 @@ static void dump_glock_func(struct gfs2_glock *gl) dump_glock(NULL, gl, true); } +static void withdraw_dq(struct gfs2_glock *gl) +{ + spin_lock(&gl->gl_lockref.lock); + if (!__lockref_is_dead(&gl->gl_lockref) && + glock_blocked_by_withdraw(gl)) + do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ + spin_unlock(&gl->gl_lockref.lock); +} + +void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) +{ + glock_hash_walk(withdraw_dq, sdp); +} + /** * gfs2_gl_hash_clear - Empty out the glock hash table * @sdp: the filesystem diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 5aed8b500cf5..0199a3dcb114 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -274,6 +274,7 @@ extern void gfs2_cancel_delete_work(struct gfs2_glock *gl); extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl); extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp); extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); +extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); extern void gfs2_glock_free(struct gfs2_glock *gl); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 95c79a3ec161..88185a341504 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -164,6 +164,11 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) } if (!ret) gfs2_make_fs_ro(sdp); + /* + * Dequeue any pending non-system glock holders that can no + * longer be granted because the file system is withdrawn. + */ + gfs2_gl_dq_holders(sdp); gfs2_freeze_unlock(&freeze_gh); } -- cgit v1.2.3 From 86934198eefa10a71f35162b06c44c36d85b98ba Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 18 Aug 2022 13:32:38 -0500 Subject: gfs2: Clear flags when withdraw prevents xmote There are a couple places in function do_xmote where normal processing is circumvented due to withdraws in progress. However, since we bypass most of do_xmote() we bypass telling dlm to lock the dlm lock, which means dlm will never respond with a completion callback. Since the completion callback ordinarily clears GLF_LOCK, this patch changes function do_xmote to handle those situations more gracefully so the file system may be unmounted after withdraw. A very similar situation happens with the GLF_DEMOTE_IN_PROGRESS flag, which is cleared by function finish_xmote(). Since the withdraw causes us to skip the majority of do_xmote, it therefore also skips the call to finish_xmote() so the DEMOTE_IN_PROGRESS flag needs to be cleared manually. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 858616afcae6..dca2cbf0338c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -59,6 +59,8 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static void __gfs2_glock_dq(struct gfs2_holder *gh); +static void handle_callback(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; @@ -730,7 +732,8 @@ static bool is_system_glock(struct gfs2_glock *gl) * */ -static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, + unsigned int target) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { @@ -741,7 +744,8 @@ __acquires(&gl->gl_lockref.lock) if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && gh && !(gh->gh_flags & LM_FLAG_NOEXP)) - return; + goto skip_inval; + lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -826,6 +830,20 @@ skip_inval: (target != LM_ST_UNLOCKED || test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { if (!is_system_glock(gl)) { + handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */ + /* + * Ordinarily, we would call dlm and its callback would call + * finish_xmote, which would call state_change() to the new state. + * Since we withdrew, we won't call dlm, so call state_change + * manually, but to the UNLOCKED state we desire. + */ + state_change(gl, LM_ST_UNLOCKED); + /* + * We skip telling dlm to do the locking, so we won't get a + * reply that would otherwise clear GLF_LOCK. So we clear it here. + */ + clear_bit(GLF_LOCK, &gl->gl_flags); + clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); goto out; } else { -- cgit v1.2.3 From 204c0300c4e99707e9fb6e57840aa1127060e63f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 26 Aug 2022 15:12:17 +0200 Subject: gfs2: Switch from strlcpy to strscpy Switch from strlcpy to strscpy and make sure that @count is the size of the smaller of the source and destination buffers. This prevents reading beyond the end of the source buffer when the source string isn't null terminated. Found by a modified version of syzkaller. Suggested-by: Wolfram Sang Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 549879929c84..236b59ef93b6 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -381,8 +381,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent) if (!table[0]) table = sdp->sd_vfs->s_id; - strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN); - strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN); + BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN); + + strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN); + strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN); table = sdp->sd_table_name; while ((table = strchr(table, '/'))) @@ -1439,13 +1441,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) switch (o) { case Opt_lockproto: - strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); break; case Opt_locktable: - strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); break; case Opt_hostdata: - strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); break; case Opt_spectator: args->ar_spectator = 1; -- cgit v1.2.3 From 670f8ce56dd0632dc29a0322e188cc73ce3c6b92 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Wed, 17 Aug 2022 13:22:00 +0100 Subject: gfs2: Check sb_bsize_shift after reading superblock Fuzzers like to scribble over sb_bsize_shift but in reality it's very unlikely that this field would be corrupted on its own. Nevertheless it should be checked to avoid the possibility of messy mount errors due to bad calculations. It's always a fixed value based on the block size so we can just check that it's the expected value. Tested with: mkfs.gfs2 -O -p lock_nolock /dev/vdb for i in 0 -1 64 65 32 33; do gfs2_edit -p sb field sb_bsize_shift $i /dev/vdb mount /dev/vdb /mnt/test && umount /mnt/test done Before this patch we get a withdraw after [ 76.413681] gfs2: fsid=loop0.0: fatal: invalid metadata block [ 76.413681] bh = 19 (type: exp=5, found=4) [ 76.413681] function = gfs2_meta_buffer, file = fs/gfs2/meta_io.c, line = 492 and with UBSAN configured we also get complaints like [ 76.373395] UBSAN: shift-out-of-bounds in fs/gfs2/ops_fstype.c:295:19 [ 76.373815] shift exponent 4294967287 is too large for 64-bit type 'long unsigned int' After the patch, these complaints don't appear, mount fails immediately and we get an explanation in dmesg. Reported-by: syzbot+dcf33a7aae997956fe06@syzkaller.appspotmail.com Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 236b59ef93b6..c7e2e6238366 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -178,7 +178,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) pr_warn("Invalid block size\n"); return -EINVAL; } - + if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) { + pr_warn("Invalid block size shift\n"); + return -EINVAL; + } return 0; } -- cgit v1.2.3 From 74b1b10e29b1f25e1a081fa82733baea65429d53 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 30 Aug 2022 13:52:13 -0500 Subject: gfs2: Register fs after creating workqueues Before this patch, the gfs2 file system was registered prior to creating the three workqueues. In some cases this allowed dlm to send recovery work to a workqueue that did not yet exist because gfs2 was still initializing. This patch changes the order of gfs2's initialization routine so it only registers the file system after the work queues are created. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/main.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 14ae9de76277..afcb32854f14 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -151,14 +151,6 @@ static int __init init_gfs2_fs(void) if (error) goto fail_shrinker; - error = register_filesystem(&gfs2_fs_type); - if (error) - goto fail_fs1; - - error = register_filesystem(&gfs2meta_fs_type); - if (error) - goto fail_fs2; - error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); @@ -180,11 +172,23 @@ static int __init init_gfs2_fs(void) goto fail_mempool; gfs2_register_debugfs(); + error = register_filesystem(&gfs2_fs_type); + if (error) + goto fail_fs1; + + error = register_filesystem(&gfs2meta_fs_type); + if (error) + goto fail_fs2; + pr_info("GFS2 installed\n"); return 0; +fail_fs2: + unregister_filesystem(&gfs2_fs_type); +fail_fs1: + mempool_destroy(gfs2_page_pool); fail_mempool: destroy_workqueue(gfs2_freeze_wq); fail_wq3: @@ -192,10 +196,6 @@ fail_wq3: fail_wq2: destroy_workqueue(gfs_recovery_wq); fail_wq1: - unregister_filesystem(&gfs2meta_fs_type); -fail_fs2: - unregister_filesystem(&gfs2_fs_type); -fail_fs1: unregister_shrinker(&gfs2_qd_shrinker); fail_shrinker: kmem_cache_destroy(gfs2_trans_cachep); -- cgit v1.2.3