diff options
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/acl.c | 7 | ||||
-rw-r--r-- | fs/gfs2/aops.c | 11 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 9 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 3 | ||||
-rw-r--r-- | fs/gfs2/file.c | 43 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 137 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 157 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 27 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 53 | ||||
-rw-r--r-- | fs/gfs2/lock_dlm.c | 52 | ||||
-rw-r--r-- | fs/gfs2/log.c | 288 | ||||
-rw-r--r-- | fs/gfs2/log.h | 1 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 14 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 3 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 59 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 76 | ||||
-rw-r--r-- | fs/gfs2/quota.h | 4 | ||||
-rw-r--r-- | fs/gfs2/recovery.c | 12 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 88 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 4 | ||||
-rw-r--r-- | fs/gfs2/super.c | 112 | ||||
-rw-r--r-- | fs/gfs2/super.h | 1 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 5 | ||||
-rw-r--r-- | fs/gfs2/trans.c | 4 | ||||
-rw-r--r-- | fs/gfs2/util.c | 419 | ||||
-rw-r--r-- | fs/gfs2/util.h | 76 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 12 |
27 files changed, 1168 insertions, 509 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 09e6be8aa036..2e939f5fe751 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -21,6 +21,7 @@ #include "glock.h" #include "inode.h" #include "meta_io.h" +#include "quota.h" #include "rgrp.h" #include "trans.h" #include "util.h" @@ -116,14 +117,14 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) return -E2BIG; - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) return ret; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) - return ret; + goto out; need_unlock = true; } @@ -143,5 +144,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) unlock: if (need_unlock) gfs2_glock_dq_uninit(&gh); +out: + gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ba83b49ce18c..786c1ce8f030 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -805,11 +805,16 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bd = bh->b_private; if (bd) { gfs2_assert_warn(sdp, bd->bd_bh == bh); - if (!list_empty(&bd->bd_list)) - list_del_init(&bd->bd_list); bd->bd_bh = NULL; bh->b_private = NULL; - kmem_cache_free(gfs2_bufdata_cachep, bd); + /* + * The bd may still be queued as a revoke, in which + * case we must not dequeue nor free it. + */ + if (!bd->bd_blkno && !list_empty(&bd->bd_list)) + list_del_init(&bd->bd_list); + if (list_empty(&bd->bd_list)) + kmem_cache_free(gfs2_bufdata_cachep, bd); } bh = bh->b_this_page; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 08f6fbb3655e..936a8ec6b48e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2183,7 +2183,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) goto out; @@ -2194,7 +2194,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rsqa_delete(ip, NULL); + gfs2_rs_delete(ip, NULL); + gfs2_qa_put(ip); return ret; } @@ -2223,7 +2224,7 @@ void gfs2_free_journal_extents(struct gfs2_jdesc *jd) struct gfs2_journal_extent *jext; while(!list_empty(&jd->extent_list)) { - jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); + jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list); list_del(&jext->list); kfree(jext); } @@ -2244,7 +2245,7 @@ static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 b struct gfs2_journal_extent *jext; if (!list_empty(&jd->extent_list)) { - jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); + jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list); if ((jext->dblock + jext->blocks) == dblock) { jext->blocks += blocks; return 0; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c8b62577e2f2..c3f7732415be 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -2028,7 +2028,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, error = gfs2_trans_begin(sdp, rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + - RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); + RES_DINODE + RES_STATFS + RES_QUOTA, RES_DINODE + + l_blocks); if (error) goto out_rg_gunlock; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index cb26be6f4351..fe305e4bfd37 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -458,10 +458,6 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); - ret = gfs2_rsqa_alloc(ip); - if (ret) - goto out; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -558,7 +554,6 @@ out_uninit: set_page_dirty(page); wait_for_stable_page(page); } -out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } @@ -635,7 +630,17 @@ int gfs2_open_common(struct inode *inode, struct file *file) gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; + if (file->f_mode & FMODE_WRITE) { + ret = gfs2_qa_get(GFS2_I(inode)); + if (ret) + goto fail; + } return 0; + +fail: + kfree(file->private_data); + file->private_data = NULL; + return ret; } /** @@ -690,10 +695,10 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if (!(file->f_mode & FMODE_WRITE)) - return 0; - - gfs2_rsqa_delete(ip, &inode->i_writecount); + if (file->f_mode & FMODE_WRITE) { + gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_qa_put(ip); + } return 0; } @@ -849,10 +854,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct gfs2_inode *ip = GFS2_I(inode); ssize_t ret; - ret = gfs2_rsqa_alloc(ip); - if (ret) - return ret; - gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (iocb->ki_flags & IOCB_APPEND) { @@ -1149,17 +1150,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le if (mode & FALLOC_FL_PUNCH_HOLE) { ret = __gfs2_punch_hole(file, offset, len); } else { - ret = gfs2_rsqa_alloc(ip); - if (ret) - goto out_putw; - ret = __gfs2_fallocate(file, mode, offset, len); - if (ret) gfs2_rs_deltree(&ip->i_res); } -out_putw: put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); @@ -1173,16 +1168,12 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - int error; - struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); - - error = gfs2_rsqa_alloc(ip); - if (error) - return (ssize_t)error; + ssize_t ret; gfs2_size_hint(out, *ppos, len); - return iter_file_splice_write(pipe, out, ppos, len, flags); + ret = iter_file_splice_write(pipe, out, ppos, len, flags); + return ret; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d0eceaff3cea..29f9b6684b74 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -133,6 +133,33 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) } } +/** + * glock_blocked_by_withdraw - determine if we can still use a glock + * @gl: the glock + * + * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted + * when we're withdrawn. For example, to maintain metadata integrity, we should + * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like + * iopen or the transaction glocks may be safely used because none of their + * metadata goes through the journal. So in general, we should disallow all + * glocks that are journaled, and allow all the others. One exception is: + * we need to allow our active journal to be promoted and demoted so others + * may recover it and we can reacquire it when they're done. + */ +static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + if (likely(!gfs2_withdrawn(sdp))) + return false; + if (gl->gl_ops->go_flags & GLOF_NONDISK) + return false; + if (!sdp->sd_jdesc || + gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) + return false; + return true; +} + void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -244,7 +271,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp)); trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } @@ -281,7 +308,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) { - const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); + const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list); if ((gh->gh_state == LM_ST_EXCLUSIVE || gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) return 0; @@ -549,8 +576,8 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; - if (unlikely(gfs2_withdrawn(sdp)) && - target != LM_ST_UNLOCKED) + if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && + gh && !(gh->gh_flags & LM_FLAG_NOEXP)) return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); @@ -575,13 +602,64 @@ __acquires(&gl->gl_lockref.lock) (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); - if (glops->go_sync) - glops->go_sync(gl); - if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + if (glops->go_sync) { + ret = glops->go_sync(gl); + /* If we had a problem syncing (due to io errors or whatever, + * we should not invalidate the metadata or tell dlm to + * release the glock to other nodes. + */ + if (ret) { + if (cmpxchg(&sdp->sd_log_error, 0, ret)) { + fs_err(sdp, "Error %d syncing glock \n", ret); + gfs2_dump_glock(NULL, gl, true); + } + return; + } + } + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { + /* + * The call to go_sync should have cleared out the ail list. + * If there are still items, we have a problem. We ought to + * withdraw, but we can't because the withdraw code also uses + * glocks. Warn about the error, dump the glock, then fall + * through and wait for logd to do the withdraw for us. + */ + if ((atomic_read(&gl->gl_ail_count) != 0) && + (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { + gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count)); + gfs2_dump_glock(NULL, gl, true); + } glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); - clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + } gfs2_glock_hold(gl); + /* + * Check for an error encountered since we called go_sync and go_inval. + * If so, we can't withdraw from the glock code because the withdraw + * code itself uses glocks (see function signal_our_withdraw) to + * change the mount to read-only. Most importantly, we must not call + * dlm to unlock the glock until the journal is in a known good state + * (after journal replay) otherwise other nodes may use the object + * (rgrp or dinode) and then later, journal replay will corrupt the + * file system. The best we can do here is wait for the logd daemon + * to see sd_log_error and withdraw, and in the meantime, requeue the + * work for later. + * + * However, if we're just unlocking the lock (say, for unmount, when + * gfs2_gl_hash_clear calls clear_glock) and recovery is complete + * then it's okay to tell dlm to unlock it. + */ + if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) + gfs2_withdraw_delayed(sdp); + if (glock_blocked_by_withdraw(gl)) { + if (target != LM_ST_UNLOCKED || + test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) { + gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); + goto out; + } + } + if (sdp->sd_lockstruct.ls_ops->lm_lock) { /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); @@ -590,8 +668,7 @@ __acquires(&gl->gl_lockref.lock) test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); - } - else if (ret) { + } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); } @@ -599,7 +676,7 @@ __acquires(&gl->gl_lockref.lock) finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); } - +out: spin_lock(&gl->gl_lockref.lock); } @@ -613,7 +690,7 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) struct gfs2_holder *gh; if (!list_empty(&gl->gl_holders)) { - gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); if (test_bit(HIF_HOLDER, &gh->gh_iflags)) return gh; } @@ -645,6 +722,9 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; + smp_mb(); + if (atomic_read(&gl->gl_revokes) != 0) + goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; @@ -1160,7 +1240,7 @@ fail: } list_add_tail(&gh->gh_list, insert_pt); do_cancel: - gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) @@ -1194,10 +1274,9 @@ trap_recursive: int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error = 0; - if (unlikely(gfs2_withdrawn(sdp))) + if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) return -EIO; if (test_bit(GLF_LRU, &gl->gl_flags)) @@ -1241,24 +1320,32 @@ int gfs2_glock_poll(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned delay = 0; int fast_path = 0; spin_lock(&gl->gl_lockref.lock); + /* + * If we're in the process of file system withdraw, we cannot just + * dequeue any glocks until our journal is recovered, lest we + * introduce file system corruption. We need two exceptions to this + * rule: We need to allow unlocking of nondisk glocks and the glock + * for our own journal that needs recovery. + */ + if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && + glock_blocked_by_withdraw(gl) && + gh->gh_gl != sdp->sd_jinode_gl) { + sdp->sd_glock_dqs_held++; + might_sleep(); + wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, + TASK_UNINTERRUPTIBLE); + } if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); list_del_init(&gh->gh_list); clear_bit(HIF_HOLDER, &gh->gh_iflags); if (find_first_holder(gl) == NULL) { - if (glops->go_unlock) { - GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); - spin_unlock(&gl->gl_lockref.lock); - glops->go_unlock(gh); - spin_lock(&gl->gl_lockref.lock); - clear_bit(GLF_LOCK, &gl->gl_flags); - } if (list_empty(&gl->gl_holders) && !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && !test_bit(GLF_DEMOTE, &gl->gl_flags)) @@ -1555,7 +1642,7 @@ __acquires(&lru_lock) list_sort(NULL, list, glock_cmp); while(!list_empty(list)) { - gl = list_entry(list->next, struct gfs2_glock, gl_lru); + gl = list_first_entry(list, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: @@ -1596,7 +1683,7 @@ static long gfs2_scan_glock_lru(int nr) spin_lock(&lru_lock); while ((nr-- >= 0) && !list_empty(&lru_list)) { - gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); + gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru); /* Test for being demotable */ if (!test_bit(GLF_LOCK, &gl->gl_flags)) { diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 061d22e1ceb6..9e9c7a4b8c66 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -29,6 +29,8 @@ struct workqueue_struct *gfs2_freeze_wq; +extern struct workqueue_struct *gfs2_control_wq; + static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { fs_err(gl->gl_name.ln_sbd, @@ -39,7 +41,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n", gl->gl_name.ln_type, gl->gl_name.ln_number, gfs2_glock2aspace(gl)); - gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n"); + gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n"); + gfs2_withdraw(gl->gl_name.ln_sbd); } /** @@ -79,34 +82,62 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, } -static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +static int gfs2_ail_empty_gl(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; + int ret; memset(&tr, 0, sizeof(tr)); INIT_LIST_HEAD(&tr.tr_buf); INIT_LIST_HEAD(&tr.tr_databuf); tr.tr_revokes = atomic_read(&gl->gl_ail_count); - if (!tr.tr_revokes) - return; + if (!tr.tr_revokes) { + bool have_revokes; + bool log_in_flight; + + /* + * We have nothing on the ail, but there could be revokes on + * the sdp revoke queue, in which case, we still want to flush + * the log and wait for it to finish. + * + * If the sdp revoke list is empty too, we might still have an + * io outstanding for writing revokes, so we should wait for + * it before returning. + * + * If none of these conditions are true, our revokes are all + * flushed and we can return. + */ + gfs2_log_lock(sdp); + have_revokes = !list_empty(&sdp->sd_log_revokes); + log_in_flight = atomic_read(&sdp->sd_log_in_flight); + gfs2_log_unlock(sdp); + if (have_revokes) + goto flush; + if (log_in_flight) + log_flush_wait(sdp); + return 0; + } /* A shortened, inline version of gfs2_trans_begin() * tr->alloced is not set since the transaction structure is * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes); tr.tr_ip = _RET_IP_; - if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) - return; + ret = gfs2_log_reserve(sdp, tr.tr_reserved); + if (ret < 0) + return ret; WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; __gfs2_ail_flush(gl, 0, tr.tr_revokes); gfs2_trans_end(sdp); +flush: gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_EMPTY_GL); + return 0; } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) @@ -140,35 +171,32 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) * return to caller to demote/unlock the glock until I/O is complete. */ -static void rgrp_go_sync(struct gfs2_glock *gl) +static int rgrp_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; - struct gfs2_rgrpd *rgd; + struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); int error; - spin_lock(&gl->gl_lockref.lock); - rgd = gl->gl_object; - if (rgd) - gfs2_rgrp_brelse(rgd); - spin_unlock(&gl->gl_lockref.lock); - if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) - return; + return 0; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_RGRP_GO_SYNC); filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + WARN_ON_ONCE(error); mapping_set_error(mapping, error); - gfs2_ail_empty_gl(gl); + if (!error) + error = gfs2_ail_empty_gl(gl); spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_free_clones(rgd); spin_unlock(&gl->gl_lockref.lock); + return error; } /** @@ -191,7 +219,6 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); - gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); if (rgd) @@ -236,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip) * */ -static void inode_go_sync(struct gfs2_glock *gl) +static int inode_go_sync(struct gfs2_glock *gl) { struct gfs2_inode *ip = gfs2_glock2inode(gl); int isreg = ip && S_ISREG(ip->i_inode.i_mode); struct address_space *metamapping = gfs2_glock2aspace(gl); - int error; + int error = 0; if (isreg) { if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) @@ -274,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl) out: gfs2_clear_glop_pending(ip); + return error; } /** @@ -291,8 +319,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gfs2_glock2inode(gl); - gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); - if (flags & DIO_METADATA) { struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); @@ -496,24 +522,29 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, * */ -static void freeze_go_sync(struct gfs2_glock *gl) +static int freeze_go_sync(struct gfs2_glock *gl) { int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (gl->gl_state == LM_ST_SHARED && + if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); + if (gfs2_withdrawn(sdp)) { + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + return 0; + } gfs2_assert_withdraw(sdp, 0); } queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); } + return 0; } /** @@ -582,8 +613,76 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) } } +/** + * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it + * @gl: glock being freed + * + * For now, this is only used for the journal inode glock. In withdraw + * situations, we need to wait for the glock to be freed so that we know + * other nodes may proceed with recovery / journal replay. + */ +static void inode_go_free(struct gfs2_glock *gl) +{ + /* Note that we cannot reference gl_object because it's already set + * to NULL by this point in its lifecycle. */ + if (!test_bit(GLF_FREEING, &gl->gl_flags)) + return; + clear_bit_unlock(GLF_FREEING, &gl->gl_flags); + wake_up_bit(&gl->gl_flags, GLF_FREEING); +} + +/** + * nondisk_go_callback - used to signal when a node did a withdraw + * @gl: the nondisk glock + * @remote: true if this came from a different cluster node + * + */ +static void nondisk_go_callback(struct gfs2_glock *gl, bool remote) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + /* Ignore the callback unless it's from another node, and it's the + live lock. */ + if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK) + return; + + /* First order of business is to cancel the demote request. We don't + * really want to demote a nondisk glock. At best it's just to inform + * us of another node's withdraw. We'll keep it in SH mode. */ + clear_bit(GLF_DEMOTE, &gl->gl_flags); + clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); + + /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */ + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || + test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || + test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) + return; + + /* We only care when a node wants us to unlock, because that means + * they want a journal recovered. */ + if (gl->gl_demote_state != LM_ST_UNLOCKED) + return; + + if (sdp->sd_args.ar_spectator) { + fs_warn(sdp, "Spectator node cannot recover journals.\n"); + return; + } + + fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n"); + set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); + /* + * We can't call remote_withdraw directly here or gfs2_recover_journal + * because this is called from the glock unlock function and the + * remote_withdraw needs to enqueue and dequeue the same "live" glock + * we were called from. So we queue it to the control work queue in + * lock_dlm. + */ + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); +} + const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_inode_glops = { @@ -594,13 +693,13 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LRU, + .go_free = inode_go_free, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_sync = rgrp_go_sync, .go_inval = rgrp_go_inval, .go_lock = gfs2_rgrp_go_lock, - .go_unlock = gfs2_rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, .go_flags = GLOF_LVB, @@ -611,30 +710,34 @@ const struct gfs2_glock_operations gfs2_freeze_glops = { .go_xmote_bh = freeze_go_xmote_bh, .go_demote_ok = freeze_go_demote_ok, .go_type = LM_TYPE_NONDISK, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, .go_callback = iopen_go_callback, - .go_flags = GLOF_LRU, + .go_flags = GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_flock_glops = { .go_type = LM_TYPE_FLOCK, - .go_flags = GLOF_LRU, + .go_flags = GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { .go_type = LM_TYPE_NONDISK, + .go_flags = GLOF_NONDISK, + .go_callback = nondisk_go_callback, }; const struct gfs2_glock_operations gfs2_quota_glops = { .go_type = LM_TYPE_QUOTA, - .go_flags = GLOF_LVB | GLOF_LRU, + .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_journal_glops = { .go_type = LM_TYPE_JOURNAL, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations *gfs2_glops_list[] = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 9fd88ed18807..84a824293a78 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -234,20 +234,21 @@ struct lm_lockname { struct gfs2_glock_operations { - void (*go_sync) (struct gfs2_glock *gl); + int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); - void (*go_unlock) (struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); + void (*go_free)(struct gfs2_glock *gl); const int go_type; const unsigned long go_flags; -#define GLOF_ASPACE 1 -#define GLOF_LVB 2 -#define GLOF_LRU 4 +#define GLOF_ASPACE 1 /* address space attached */ +#define GLOF_LVB 2 /* Lock Value Block attached */ +#define GLOF_LRU 4 /* LRU managed */ +#define GLOF_NONDISK 8 /* not I/O related */ }; enum { @@ -294,6 +295,7 @@ struct gfs2_qadata { /* quota allocation data */ struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS]; struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS]; unsigned int qa_qd_num; + int qa_ref; }; /* Resource group multi-block reservation, in order of appearance: @@ -343,6 +345,7 @@ enum { GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, GLF_INODE_CREATING = 16, /* Inode creation occurring */ + GLF_FREEING = 18, /* Wait for glock to be freed */ }; struct gfs2_glock { @@ -542,6 +545,7 @@ struct gfs2_jdesc { struct list_head jd_revoke_list; unsigned int jd_replay_tail; + u64 jd_no_addr; }; struct gfs2_statfs_change_host { @@ -616,8 +620,12 @@ enum { SDF_RORECOVERY = 7, /* read only recovery */ SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, - SDF_AIL1_IO_ERROR = 10, - SDF_FS_FROZEN = 11, + SDF_FS_FROZEN = 10, + SDF_WITHDRAWING = 11, /* Will withdraw eventually */ + SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ + SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ + SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are + withdrawing */ }; enum gfs2_freeze_state { @@ -768,6 +776,7 @@ struct gfs2_sbd { struct gfs2_jdesc *sd_jdesc; struct gfs2_holder sd_journal_gh; struct gfs2_holder sd_jinode_gh; + struct gfs2_glock *sd_jinode_gl; struct gfs2_holder sd_sc_gh; struct gfs2_holder sd_qc_gh; @@ -828,7 +837,8 @@ struct gfs2_sbd { atomic_t sd_log_in_flight; struct bio *sd_log_bio; wait_queue_head_t sd_log_flush_wait; - int sd_log_error; + int sd_log_error; /* First log error */ + wait_queue_head_t sd_withdraw_wait; atomic_t sd_reserving_log; wait_queue_head_t sd_reserving_log_wait; @@ -852,6 +862,7 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ + unsigned long sd_glock_dqs_held; }; static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8294851a9dd9..70b2d3a1e866 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -144,7 +144,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) - goto fail_put; + goto fail; if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { /* @@ -155,13 +155,13 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); if (error) - goto fail_put; + goto fail; if (blktype != GFS2_BLKST_FREE) { error = gfs2_check_blk_type(sdp, no_addr, blktype); if (error) - goto fail_put; + goto fail; } } @@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, set_bit(GIF_INVALID, &ip->i_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) - goto fail_put; + goto fail; glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_put(io_gl); io_gl = NULL; @@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, /* Inode glock must be locked already */ error = gfs2_inode_refresh(GFS2_I(inode)); if (error) - goto fail_refresh; + goto fail; } else { ip->i_no_formal_ino = no_formal_ino; inode->i_mode = DT2IF(type); @@ -197,17 +197,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, gfs2_glock_dq_uninit(&i_gh); return inode; -fail_refresh: - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - glock_clear_object(ip->i_iopen_gh.gh_gl, ip); - gfs2_glock_dq_uninit(&ip->i_iopen_gh); -fail_put: +fail: if (io_gl) gfs2_glock_put(io_gl); - glock_clear_object(ip->i_gl, ip); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); -fail: iget_failed(inode); return ERR_PTR(error); } @@ -594,13 +588,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; - error = gfs2_rsqa_alloc(dip); + error = gfs2_qa_get(dip); if (error) return error; error = gfs2_rindex_update(sdp); if (error) - return error; + goto fail; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) @@ -647,7 +641,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_gunlock; ip = GFS2_I(inode); - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_get(ip); if (error) goto fail_free_acls; @@ -782,11 +776,13 @@ fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); fail_free_inode: + gfs2_qa_put(ip); if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); } - gfs2_rsqa_delete(ip, NULL); + gfs2_rs_delete(ip, NULL); + gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); posix_acl_release(acl); @@ -804,6 +800,7 @@ fail_gunlock: if (gfs2_holder_initialized(ghs + 1)) gfs2_glock_dq_uninit(ghs + 1); fail: + gfs2_qa_put(dip); return error; } @@ -905,7 +902,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; - error = gfs2_rsqa_alloc(dip); + error = gfs2_qa_get(dip); if (error) return error; @@ -1008,6 +1005,7 @@ out_gunlock: out_child: gfs2_glock_dq(ghs); out_parent: + gfs2_qa_put(ip); gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); return error; @@ -1368,7 +1366,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; - error = gfs2_rsqa_alloc(ndip); + error = gfs2_qa_get(ndip); if (error) return error; @@ -1568,6 +1566,7 @@ out_gunlock_r: if (gfs2_holder_initialized(&r_gh)) gfs2_glock_dq_uninit(&r_gh); out: + gfs2_qa_put(ndip); return error; } @@ -1879,10 +1878,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) ouid = nuid = NO_UID_QUOTA_CHANGE; if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_get(ip); if (error) - goto out; + return error; error = gfs2_rindex_update(sdp); if (error) @@ -1920,6 +1918,7 @@ out_end_trans: out_gunlock_q: gfs2_quota_unlock(ip); out: + gfs2_qa_put(ip); return error; } @@ -1941,21 +1940,21 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_get(ip); if (error) return error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) - return error; + goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; + goto error; error = setattr_prepare(dentry, attr); if (error) - goto out; + goto error; if (attr->ia_valid & ATTR_SIZE) error = gfs2_setattr_size(inode, attr->ia_size); @@ -1967,10 +1966,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) error = posix_acl_chmod(inode, inode->i_mode); } -out: +error: if (!error) mark_inode_dirty(inode); gfs2_glock_dq_uninit(&i_gh); +out: + gfs2_qa_put(ip); return error; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 7c7197343ee2..9f2b5609f225 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -16,6 +16,8 @@ #include "incore.h" #include "glock.h" +#include "glops.h" +#include "recovery.h" #include "util.h" #include "sys.h" #include "trace_gfs2.h" @@ -124,6 +126,8 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ + if (gl->gl_ops->go_free) + gl->gl_ops->go_free(gl); gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ @@ -323,6 +327,7 @@ static void gdlm_cancel(struct gfs2_glock *gl) /* * dlm/gfs2 recovery coordination using dlm_recover callbacks * + * 0. gfs2 checks for another cluster node withdraw, needing journal replay * 1. dlm_controld sees lockspace members change * 2. dlm_controld blocks dlm-kernel locking activity * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) @@ -571,6 +576,28 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) &ls->ls_control_lksb, "control_lock"); } +/** + * remote_withdraw - react to a node withdrawing from the file system + * @sdp: The superblock + */ +static void remote_withdraw(struct gfs2_sbd *sdp) +{ + struct gfs2_jdesc *jd; + int ret = 0, count = 0; + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) + continue; + ret = gfs2_recover_journal(jd, true); + if (ret) + break; + count++; + } + + /* Now drop the additional reference we acquired */ + fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret); +} + static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); @@ -581,6 +608,13 @@ static void gfs2_control_func(struct work_struct *work) int recover_size; int i, error; + /* First check for other nodes that may have done a withdraw. */ + if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) { + remote_withdraw(sdp); + clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); + return; + } + spin_lock(&ls->ls_recover_spin); /* * No MOUNT_DONE means we're still mounting; control_mount() @@ -1079,6 +1113,10 @@ static void gdlm_recover_prep(void *arg) struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_prep ignored due to withdraw.\n"); + return; + } spin_lock(&ls->ls_recover_spin); ls->ls_recover_block = ls->ls_recover_start; set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); @@ -1101,6 +1139,11 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int jid = slot->slot - 1; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n", + jid); + return; + } spin_lock(&ls->ls_recover_spin); if (ls->ls_recover_size < jid + 1) { fs_err(sdp, "recover_slot jid %d gen %u short size %d\n", @@ -1125,6 +1168,10 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_done ignored due to withdraw.\n"); + return; + } /* ensure the ls jid arrays are large enough */ set_recover_size(sdp, slots, num_slots); @@ -1152,6 +1199,11 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, { struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n", + jid); + return; + } if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) return; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 00a2e721a374..3a75843ae580 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -88,8 +88,7 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct writeback_control *wbc, - struct gfs2_trans *tr, - bool *withdraw) + struct gfs2_trans *tr) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { @@ -97,6 +96,7 @@ __acquires(&sdp->sd_ail_lock) struct address_space *mapping; struct gfs2_bufdata *bd, *s; struct buffer_head *bh; + int ret = 0; list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; @@ -104,16 +104,21 @@ __acquires(&sdp->sd_ail_lock) gfs2_assert(sdp, bd->bd_tr == tr); if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh) && - !test_and_set_bit(SDF_AIL1_IO_ERROR, - &sdp->sd_flags)) { + if (buffer_uptodate(bh)) { + list_move(&bd->bd_ail_st_list, + &tr->tr_ail2_list); + continue; + } + if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { gfs2_io_error_bh(sdp, bh); - *withdraw = true; + gfs2_withdraw_delayed(sdp); } - list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); - continue; } + if (gfs2_withdrawn(sdp)) { + gfs2_remove_from_ail(bd); + continue; + } if (!buffer_dirty(bh)) continue; if (gl == bd->bd_gl) @@ -124,16 +129,50 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - generic_writepages(mapping, wbc); + ret = generic_writepages(mapping, wbc); spin_lock(&sdp->sd_ail_lock); - if (wbc->nr_to_write <= 0) + if (ret || wbc->nr_to_write <= 0) break; - return 1; + return -EBUSY; } - return 0; + return ret; } +static void dump_ail_list(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n", + current->journal_info ? 1 : 0); + + list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { + list_for_each_entry_reverse(bd, &tr->tr_ail1_list, + bd_ail_st_list) { + bh = bd->bd_bh; + fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd, + (unsigned long long)bd->bd_blkno, bh); + if (!bh) { + fs_err(sdp, "\n"); + continue; + } + fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d " + "map:%d new:%d ar:%d aw:%d delay:%d " + "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n", + (unsigned long long)bh->b_blocknr, + buffer_uptodate(bh), buffer_dirty(bh), + buffer_locked(bh), buffer_req(bh), + buffer_mapped(bh), buffer_new(bh), + buffer_async_read(bh), buffer_async_write(bh), + buffer_delay(bh), buffer_write_io_error(bh), + buffer_unwritten(bh), + buffer_defer_completion(bh), + buffer_pinned(bh), buffer_escaped(bh)); + } + } +} /** * gfs2_ail1_flush - start writeback of some ail1 entries @@ -149,23 +188,36 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; - bool withdraw = false; + int ret; + unsigned long flush_start = jiffies; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: + ret = 0; + if (time_after(jiffies, flush_start + (HZ * 600))) { + dump_ail_list(sdp); + goto out; + } list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; - if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw) && - !gfs2_withdrawn(sdp)) - goto restart; + ret = gfs2_ail1_start_one(sdp, wbc, tr); + if (ret) { + if (ret == -EBUSY) + goto restart; + break; + } } +out: spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); - if (withdraw) - gfs2_lm_withdraw(sdp, NULL); + if (ret) { + gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) " + "returned: %d\n", ret); + gfs2_withdraw(sdp); + } trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -189,12 +241,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) /** * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced * @sdp: the filesystem - * @ai: the AIL entry + * @tr: the transaction + * @max_revokes: If nonzero, issue revokes for the bd items for written buffers * */ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - bool *withdraw) + int *max_revokes) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -203,12 +256,32 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, bd_ail_st_list) { bh = bd->bd_bh; gfs2_assert(sdp, bd->bd_tr == tr); - if (buffer_busy(bh)) + /* + * If another process flagged an io error, e.g. writing to the + * journal, error all other bhs and move them off the ail1 to + * prevent a tight loop when unmount tries to flush ail1, + * regardless of whether they're still busy. If no outside + * errors were found and the buffer is busy, move to the next. + * If the ail buffer is not busy and caught an error, flag it + * for others. + */ + if (!sdp->sd_log_error && buffer_busy(bh)) continue; if (!buffer_uptodate(bh) && - !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) { + !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { gfs2_io_error_bh(sdp, bh); - *withdraw = true; + gfs2_withdraw_delayed(sdp); + } + /* + * If we have space for revokes and the bd is no longer on any + * buf list, we can just add a revoke for it immediately and + * avoid having to put it on the ail2 list, where it would need + * to be revoked later. + */ + if (*max_revokes && list_empty(&bd->bd_list)) { + gfs2_add_revoke(sdp, bd); + (*max_revokes)--; + continue; } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } @@ -217,20 +290,20 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, /** * gfs2_ail1_empty - Try to empty the ail1 lists * @sdp: The superblock + * @max_revokes: If non-zero, add revokes where appropriate * * Tries to empty the ail1 lists, starting with the oldest first */ -static int gfs2_ail1_empty(struct gfs2_sbd *sdp) +static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) { struct gfs2_trans *tr, *s; int oldest_tr = 1; int ret; - bool withdraw = false; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { - gfs2_ail1_empty_one(sdp, tr, &withdraw); + gfs2_ail1_empty_one(sdp, tr, &max_revokes); if (list_empty(&tr->tr_ail1_list) && oldest_tr) list_move(&tr->tr_list, &sdp->sd_ail2_list); else @@ -239,8 +312,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); - if (withdraw) - gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n"); + if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) { + gfs2_lm(sdp, "fatal: I/O error(s)\n"); + gfs2_withdraw(sdp); + } return ret; } @@ -268,20 +343,17 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp) } /** - * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced - * @sdp: the filesystem - * @ai: the AIL entry - * + * gfs2_ail_empty_tr - empty one of the ail lists for a transaction */ -static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + struct list_head *head) { - struct list_head *head = &tr->tr_ail2_list; struct gfs2_bufdata *bd; while (!list_empty(head)) { - bd = list_entry(head->prev, struct gfs2_bufdata, - bd_ail_st_list); + bd = list_first_entry(head, struct gfs2_bufdata, + bd_ail_st_list); gfs2_assert(sdp, bd->bd_tr == tr); gfs2_remove_from_ail(bd); } @@ -303,7 +375,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) if (!rm) continue; - gfs2_ail2_empty_one(sdp, tr); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); @@ -487,7 +559,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) if (list_empty(&sdp->sd_ail1_list)) { tail = sdp->sd_log_head; } else { - tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans, + tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans, tr_list); tail = tr->tr_first; } @@ -512,7 +584,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) } -static void log_flush_wait(struct gfs2_sbd *sdp) +void log_flush_wait(struct gfs2_sbd *sdp) { DEFINE_WAIT(wait); @@ -549,7 +621,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_ordered_lock); list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp); while (!list_empty(&sdp->sd_log_ordered)) { - ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered); + ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); if (ip->i_inode.i_mapping->nrpages == 0) { test_and_clear_bit(GIF_ORDERED, &ip->i_flags); list_del(&ip->i_ordered); @@ -570,7 +642,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_ordered_lock); while (!list_empty(&sdp->sd_log_ordered)) { - ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered); + ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); list_del(&ip->i_ordered); WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags)); if (ip->i_inode.i_mapping->nrpages == 0) @@ -616,27 +688,24 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) } } +/** + * gfs2_write_revokes - Add as many revokes to the system transaction as we can + * @sdp: The GFS2 superblock + * + * Our usual strategy is to defer writing revokes as much as we can in the hope + * that we'll eventually overwrite the journal, which will make those revokes + * go away. This changes when we flush the log: at that point, there will + * likely be some left-over space in the last revoke block of that transaction. + * We can fill that space with additional revokes for blocks that have already + * been written back. This will basically come at no cost now, and will save + * us from having to keep track of those blocks on the AIL2 list later. + */ void gfs2_write_revokes(struct gfs2_sbd *sdp) { - struct gfs2_trans *tr; - struct gfs2_bufdata *bd, *tmp; - int have_revokes = 0; + /* number of revokes we still have room for */ int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); - gfs2_ail1_empty(sdp); - spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { - list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) { - if (list_empty(&bd->bd_list)) { - have_revokes = 1; - goto done; - } - } - } -done: - spin_unlock(&sdp->sd_ail_lock); - if (have_revokes == 0) - return; + gfs2_log_lock(sdp); while (sdp->sd_log_num_revoke > max_revokes) max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); max_revokes -= sdp->sd_log_num_revoke; @@ -647,20 +716,7 @@ done: if (!sdp->sd_log_blks_reserved) atomic_dec(&sdp->sd_log_blks_free); } - gfs2_log_lock(sdp); - spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { - list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) { - if (max_revokes == 0) - goto out_of_blocks; - if (!list_empty(&bd->bd_list)) - continue; - gfs2_add_revoke(sdp, bd); - max_revokes--; - } - } -out_of_blocks: - spin_unlock(&sdp->sd_ail_lock); + gfs2_ail1_empty(sdp, max_revokes); gfs2_log_unlock(sdp); if (!sdp->sd_log_num_revoke) { @@ -787,6 +843,40 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) } /** + * ail_drain - drain the ail lists after a withdraw + * @sdp: Pointer to GFS2 superblock + */ +static void ail_drain(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + + spin_lock(&sdp->sd_ail_lock); + /* + * For transactions on the sd_ail1_list we need to drain both the + * ail1 and ail2 lists. That's because function gfs2_ail1_start_one + * (temporarily) moves items from its tr_ail1 list to tr_ail2 list + * before revokes are sent for that block. Items on the sd_ail2_list + * should have already gotten beyond that point, so no need. + */ + while (!list_empty(&sdp->sd_ail1_list)) { + tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans, + tr_list); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); + list_del(&tr->tr_list); + kfree(tr); + } + while (!list_empty(&sdp->sd_ail2_list)) { + tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, + tr_list); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); + list_del(&tr->tr_list); + kfree(tr); + } + spin_unlock(&sdp->sd_ail_lock); +} + +/** * gfs2_log_flush - flush incore transaction(s) * @sdp: the filesystem * @gl: The glock structure to flush. If NULL, flush the whole incore log @@ -796,11 +886,18 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) { - struct gfs2_trans *tr; + struct gfs2_trans *tr = NULL; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); down_write(&sdp->sd_log_flush_lock); + /* + * Do this check while holding the log_flush_lock to prevent new + * buffers from being added to the ail via gfs2_pin() + */ + if (gfs2_withdrawn(sdp)) + goto out; + /* Log might have been flushed while we waited for the flush lock */ if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) { up_write(&sdp->sd_log_flush_lock); @@ -819,17 +916,27 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) INIT_LIST_HEAD(&tr->tr_ail2_list); tr->tr_first = sdp->sd_log_flush_head; if (unlikely (state == SFS_FROZEN)) - gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new); + if (gfs2_assert_withdraw_delayed(sdp, + !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) + goto out; } if (unlikely(state == SFS_FROZEN)) - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - gfs2_assert_withdraw(sdp, - sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke); + if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke)) + goto out; + if (gfs2_assert_withdraw_delayed(sdp, + sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke)) + goto out; gfs2_ordered_write(sdp); + if (gfs2_withdrawn(sdp)) + goto out; lops_before_commit(sdp, tr); + if (gfs2_withdrawn(sdp)) + goto out; gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE); + if (gfs2_withdrawn(sdp)) + goto out; if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); @@ -839,6 +946,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); } + if (gfs2_withdrawn(sdp)) + goto out; lops_after_commit(sdp, tr); gfs2_log_lock(sdp); @@ -859,9 +968,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) for (;;) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp)) + if (gfs2_ail1_empty(sdp, 0)) break; } + if (gfs2_withdrawn(sdp)) + goto out; atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); @@ -874,6 +985,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } +out: + if (gfs2_withdrawn(sdp)) { + ail_drain(sdp); /* frees all transactions */ + tr = NULL; + } + trace_gfs2_log_flush(sdp, 0, flags); up_write(&sdp->sd_log_flush_lock); @@ -1016,16 +1133,17 @@ int gfs2_logd(void *data) /* Check for errors writing to the journal */ if (sdp->sd_log_error) { - gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: error %d: " - "withdrawing the file system to " - "prevent further damage.\n", - sdp->sd_fsname, sdp->sd_log_error); + gfs2_lm(sdp, + "GFS2: fsid=%s: error %d: " + "withdrawing the file system to " + "prevent further damage.\n", + sdp->sd_fsname, sdp->sd_log_error); + gfs2_withdraw(sdp); } did_flush = false; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { - gfs2_ail1_empty(sdp); + gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_LOGD_JFLUSH_REQD); did_flush = true; @@ -1034,7 +1152,7 @@ int gfs2_logd(void *data) if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); - gfs2_ail1_empty(sdp); + gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_LOGD_AIL_FLUSH_REQD); did_flush = true; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index c0a65e5a126b..c1cd6ae17659 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -73,6 +73,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); +extern void log_flush_wait(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index c090d5ad3f22..5ea96757afc4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -203,8 +203,12 @@ static void gfs2_end_log_write(struct bio *bio) struct bvec_iter_all iter_all; if (bio->bi_status) { - fs_err(sdp, "Error %d writing to journal, jid=%u\n", - bio->bi_status, sdp->sd_jdesc->jd_jid); + if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status)) + fs_err(sdp, "Error %d writing to journal, jid=%u\n", + bio->bi_status, sdp->sd_jdesc->jd_jid); + gfs2_withdraw_delayed(sdp); + /* prevent more writes to the journal */ + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); wake_up(&sdp->sd_logd_waitq); } @@ -730,7 +734,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) head = &tr->tr_buf; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gfs2_unpin(sdp, bd->bd_bh, tr); } @@ -900,7 +904,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) struct gfs2_glock *gl; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gl = bd->bd_gl; gfs2_glock_remove_revoke(gl); @@ -1079,7 +1083,7 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) head = &tr->tr_databuf; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gfs2_unpin(sdp, bd->bd_bh, tr); } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0c3772974030..4b72abcf83b2 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -251,7 +251,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (unlikely(gfs2_withdrawn(sdp))) { + if (unlikely(gfs2_withdrawn(sdp)) && + (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) { *bhp = NULL; return -EIO; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a1a8ef7ed3fd..e2b69ffcc6a8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -552,6 +552,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) mutex_lock(&sdp->sd_jindex_mutex); for (;;) { + struct gfs2_inode *jip; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); if (error) break; @@ -591,6 +593,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) spin_lock(&sdp->sd_jindex_spin); jd->jd_jid = sdp->sd_journals++; + jip = GFS2_I(jd->jd_inode); + jd->jd_no_addr = jip->i_no_addr; list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); spin_unlock(&sdp->sd_jindex_spin); } @@ -600,48 +604,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) return error; } -/** - * check_journal_clean - Make sure a journal is clean for a spectator mount - * @sdp: The GFS2 superblock - * @jd: The journal descriptor - * - * Returns: 0 if the journal is clean or locked, else an error - */ -static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) -{ - int error; - struct gfs2_holder j_gh; - struct gfs2_log_header_host head; - struct gfs2_inode *ip; - - ip = GFS2_I(jd->jd_inode); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | - GL_EXACT | GL_NOCACHE, &j_gh); - if (error) { - fs_err(sdp, "Error locking journal for spectator mount.\n"); - return -EPERM; - } - error = gfs2_jdesc_check(jd); - if (error) { - fs_err(sdp, "Error checking journal for spectator mount.\n"); - goto out_unlock; - } - error = gfs2_find_jhead(jd, &head, false); - if (error) { - fs_err(sdp, "Error parsing journal for spectator mount.\n"); - goto out_unlock; - } - if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - error = -EPERM; - fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " - "must not be a spectator.\n", jd->jd_jid); - } - -out_unlock: - gfs2_glock_dq_uninit(&j_gh); - return error; -} - static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = d_inode(sdp->sd_master_dir); @@ -694,7 +656,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, &gfs2_journal_glops, - LM_ST_EXCLUSIVE, LM_FLAG_NOEXP, + LM_ST_EXCLUSIVE, + LM_FLAG_NOEXP | GL_NOCACHE, &sdp->sd_journal_gh); if (error) { fs_err(sdp, "can't acquire journal glock: %d\n", error); @@ -702,6 +665,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) } ip = GFS2_I(sdp->sd_jdesc->jd_inode); + sdp->sd_jinode_gl = ip->i_gl; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE, &sdp->sd_jinode_gh); @@ -732,7 +696,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x); if (sdp->sd_args.ar_spectator) { - error = check_journal_clean(sdp, jd); + error = check_journal_clean(sdp, jd, true); if (error) goto fail_jinode_gh; continue; @@ -762,10 +726,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) return 0; fail_jinode_gh: - if (!sdp->sd_args.ar_spectator) + /* A withdraw may have done dq/uninit so now we need to check it */ + if (!sdp->sd_args.ar_spectator && + gfs2_holder_initialized(&sdp->sd_jinode_gh)) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); fail_journal_gh: - if (!sdp->sd_args.ar_spectator) + if (!sdp->sd_args.ar_spectator && + gfs2_holder_initialized(&sdp->sd_journal_gh)) gfs2_glock_dq_uninit(&sdp->sd_journal_gh); fail_jindex: gfs2_jindex_free(sdp); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e9f93045eb01..cc0c4b5800be 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -115,7 +115,7 @@ static void gfs2_qd_dispose(struct list_head *list) struct gfs2_sbd *sdp; while (!list_empty(list)) { - qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); + qd = list_first_entry(list, struct gfs2_quota_data, qd_lru); sdp = qd->qd_gl->gl_name.ln_sbd; list_del(&qd->qd_lru); @@ -525,11 +525,11 @@ static void qdsb_put(struct gfs2_quota_data *qd) } /** - * gfs2_qa_alloc - make sure we have a quota allocations data structure, - * if necessary + * gfs2_qa_get - make sure we have a quota allocations data structure, + * if necessary * @ip: the inode for this reservation */ -int gfs2_qa_alloc(struct gfs2_inode *ip) +int gfs2_qa_get(struct gfs2_inode *ip) { int error = 0; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -540,17 +540,21 @@ int gfs2_qa_alloc(struct gfs2_inode *ip) down_write(&ip->i_rw_mutex); if (ip->i_qadata == NULL) { ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); - if (!ip->i_qadata) + if (!ip->i_qadata) { error = -ENOMEM; + goto out; + } } + ip->i_qadata->qa_ref++; +out: up_write(&ip->i_rw_mutex); return error; } -void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_qa_put(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); - if (ip->i_qadata && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { + if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) { kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); ip->i_qadata = NULL; } @@ -566,27 +570,27 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - if (ip->i_qadata == NULL) { - error = gfs2_rsqa_alloc(ip); - if (error) - return error; - } + error = gfs2_qa_get(ip); + if (error) + return error; qd = ip->i_qadata->qa_qd; if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) || - gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) - return -EIO; + gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) { + error = -EIO; + goto out; + } error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; @@ -594,7 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) !uid_eq(uid, ip->i_inode.i_uid)) { error = qdsb_get(sdp, make_kqid_uid(uid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; } @@ -603,14 +607,15 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) !gid_eq(gid, ip->i_inode.i_gid)) { error = qdsb_get(sdp, make_kqid_gid(gid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; } -out: +out_unhold: if (error) gfs2_quota_unhold(ip); +out: return error; } @@ -621,6 +626,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip) if (ip->i_qadata == NULL) return; + gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { @@ -628,6 +634,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip) ip->i_qadata->qa_qd[x] = NULL; } ip->i_qadata->qa_qd_num = 0; + gfs2_qa_put(ip); } static int sort_qd(const void *a, const void *b) @@ -876,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_get(ip); if (error) return error; @@ -884,8 +891,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) &data_blocks, &ind_blocks); ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); - if (!ghs) - return -ENOMEM; + if (!ghs) { + error = -ENOMEM; + goto out; + } sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); inode_lock(&ip->i_inode); @@ -893,12 +902,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); if (error) - goto out; + goto out_dq; } error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) - goto out; + goto out_dq; for (x = 0; x < num_qd; x++) { offset = qd2offset(qda[x]); @@ -950,13 +959,15 @@ out_ipres: gfs2_inplace_release(ip); out_alloc: gfs2_glock_dq_uninit(&i_gh); -out: +out_dq: while (qx--) gfs2_glock_dq_uninit(&ghs[qx]); inode_unlock(&ip->i_inode); kfree(ghs); gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC); +out: + gfs2_qa_put(ip); return error; } @@ -1259,6 +1270,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; + BUG_ON(ip->i_qadata->qa_ref <= 0); for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; @@ -1441,7 +1453,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) spin_lock(&qd_lock); while (!list_empty(head)) { - qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); + qd = list_last_entry(head, struct gfs2_quota_data, qd_list); list_del(&qd->qd_list); @@ -1476,8 +1488,8 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) if (error == 0 || error == -EROFS) return; if (!gfs2_withdrawn(sdp)) { - fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); - sdp->sd_log_error = error; + if (!cmpxchg(&sdp->sd_log_error, 0, error)) + fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); wake_up(&sdp->sd_logd_waitq); } } @@ -1504,7 +1516,7 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp) ip = NULL; spin_lock(&sdp->sd_trunc_lock); if (!list_empty(&sdp->sd_trunc_list)) { - ip = list_entry(sdp->sd_trunc_list.next, + ip = list_first_entry(&sdp->sd_trunc_list, struct gfs2_inode, i_trunc_list); list_del_init(&ip->i_trunc_list); } @@ -1541,6 +1553,8 @@ int gfs2_quotad(void *data) while (!kthread_should_stop()) { + if (gfs2_withdrawn(sdp)) + goto bypass; /* Update the master statfs file */ if (sdp->sd_statfs_force_sync) { int error = gfs2_statfs_sync(sdp->sd_vfs, 0); @@ -1561,6 +1575,7 @@ int gfs2_quotad(void *data) try_to_freeze(); +bypass: t = min(quotad_timeo, statfs_timeo); prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); @@ -1674,7 +1689,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (error) return error; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_get(ip); if (error) goto out_put; @@ -1743,6 +1758,7 @@ out_i: out_q: gfs2_glock_dq_uninit(&q_gh); out_unlockput: + gfs2_qa_put(ip); inode_unlock(&ip->i_inode); out_put: qd_put(qd); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 765627d9a91e..7f9ca8ef40fc 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -15,8 +15,8 @@ struct gfs2_sbd; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID -extern int gfs2_qa_alloc(struct gfs2_inode *ip); -extern void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern int gfs2_qa_get(struct gfs2_inode *ip); +extern void gfs2_qa_put(struct gfs2_inode *ip); extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unhold(struct gfs2_inode *ip); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 85f830e56945..96c345f49273 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -111,7 +111,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd) struct gfs2_revoke_replay *rr; while (!list_empty(head)) { - rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list); + rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list); list_del(&rr->rr_list); kfree(rr); } @@ -305,6 +305,11 @@ void gfs2_recover_func(struct work_struct *work) int error = 0; int jlocked = 0; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n", + jd->jd_jid); + goto fail; + } t_start = ktime_get(); if (sdp->sd_args.ar_spectator) goto fail; @@ -393,6 +398,10 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n", jd->jd_jid, head.lh_tail, head.lh_blkno); + /* We take the sd_log_flush_lock here primarily to prevent log + * flushes and simultaneous journal replays from stomping on + * each other wrt sd_log_bio. */ + down_read(&sdp->sd_log_flush_lock); for (pass = 0; pass < 2; pass++) { lops_before_scan(jd, &head, pass); error = foreach_descriptor(jd, head.lh_tail, @@ -403,6 +412,7 @@ void gfs2_recover_func(struct work_struct *work) } clean_journal(jd, &head); + up_read(&sdp->sd_log_flush_lock); gfs2_glock_dq_uninit(&thaw_gh); t_rep = ktime_get(); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e7bf91ec231c..a321c34e3d6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -457,24 +457,24 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } if (count[0] != rgd->rd_free) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "free data mismatch: %u != %u\n", - count[0], rgd->rd_free); + gfs2_lm(sdp, "free data mismatch: %u != %u\n", + count[0], rgd->rd_free); + gfs2_consist_rgrpd(rgd); return; } tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; if (count[1] != tmp) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used data mismatch: %u != %u\n", - count[1], tmp); + gfs2_lm(sdp, "used data mismatch: %u != %u\n", + count[1], tmp); + gfs2_consist_rgrpd(rgd); return; } if (count[2] + count[3] != rgd->rd_dinodes) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used metadata mismatch: %u != %u\n", - count[2] + count[3], rgd->rd_dinodes); + gfs2_lm(sdp, "used metadata mismatch: %u != %u\n", + count[2] + count[3], rgd->rd_dinodes); + gfs2_consist_rgrpd(rgd); return; } } @@ -590,16 +590,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } -/** - * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode - * plus a quota allocations data structure, if necessary - * @ip: the inode for this reservation - */ -int gfs2_rsqa_alloc(struct gfs2_inode *ip) -{ - return gfs2_qa_alloc(ip); -} - static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, const char *fs_id_buf) { @@ -672,18 +662,17 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) } /** - * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation + * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation * @wcount: The inode's write count, or NULL * */ -void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); if ((wcount == NULL) || (atomic_read(wcount) <= 1)) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); - gfs2_qa_delete(ip, wcount); } /** @@ -720,8 +709,12 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - glock_clear_object(gl, rgd); + if (gl->gl_state != LM_ST_UNLOCKED) { + gfs2_glock_cb(gl, LM_ST_UNLOCKED); + flush_delayed_work(&gl->gl_work); + } gfs2_rgrp_brelse(rgd); + glock_clear_object(gl, rgd); gfs2_glock_put(gl); } @@ -733,17 +726,6 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) } } -static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) -{ - struct gfs2_sbd *sdp = rgd->rd_sbd; - - fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); - fs_info(sdp, "ri_length = %u\n", rgd->rd_length); - fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); - fs_info(sdp, "ri_data = %u\n", rgd->rd_data); - fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes); -} - /** * gfs2_compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor @@ -814,11 +796,20 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) } bi = rgd->rd_bits + (length - 1); if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) { - if (gfs2_consist_rgrpd(rgd)) { - gfs2_rindex_print(rgd); - fs_err(sdp, "start=%u len=%u offset=%u\n", - bi->bi_start, bi->bi_bytes, bi->bi_offset); - } + gfs2_lm(sdp, + "ri_addr = %llu\n" + "ri_length = %u\n" + "ri_data0 = %llu\n" + "ri_data = %u\n" + "ri_bitbytes = %u\n" + "start=%u len=%u offset=%u\n", + (unsigned long long)rgd->rd_addr, + rgd->rd_length, + (unsigned long long)rgd->rd_data0, + rgd->rd_data, + rgd->rd_bitbytes, + bi->bi_start, bi->bi_bytes, bi->bi_offset); + gfs2_consist_rgrpd(rgd); return -EIO; } @@ -1286,23 +1277,6 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } - -} - -/** - * gfs2_rgrp_go_unlock - Unlock a rgrp glock - * @gh: The glock holder for the resource group - * - */ - -void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) | - test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags); - - if (rgd && demote_requested) - gfs2_rgrp_brelse(rgd); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, @@ -1832,10 +1806,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { - down_write(&sdp->sd_log_flush_lock); error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, true); - up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; if (WARN_ON_ONCE(error)) diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index c14a673ae36f..a1d7e14fc55b 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -33,7 +33,6 @@ extern int gfs2_rindex_update(struct gfs2_sbd *sdp); extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); -extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); @@ -45,9 +44,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); -extern int gfs2_rsqa_alloc(struct gfs2_inode *ip); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 68cc7c291a81..37fc41632aa2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -61,11 +61,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) sdp->sd_journals = 0; spin_unlock(&sdp->sd_jindex_spin); + sdp->sd_jdesc = NULL; while (!list_empty(&list)) { - jd = list_entry(list.next, struct gfs2_jdesc, jd_list); + jd = list_first_entry(&list, struct gfs2_jdesc, jd_list); gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); + jd->jd_inode = NULL; kfree(jd); } } @@ -171,9 +173,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) goto fail_threads; j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); + if (gfs2_withdrawn(sdp)) { + error = -EIO; + goto fail; + } error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); - if (error) + if (error || gfs2_withdrawn(sdp)) goto fail; if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { @@ -187,7 +193,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) gfs2_log_pointers_init(sdp, head.lh_blkno); error = gfs2_quota_init(sdp); - if (error) + if (error || gfs2_withdrawn(sdp)) goto fail; set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); @@ -446,7 +452,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) out: while (!list_empty(&list)) { - lfcc = list_entry(list.next, struct lfcc, list); + lfcc = list_first_entry(&list, struct lfcc, list); list_del(&lfcc->list); gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); @@ -599,34 +605,63 @@ out: int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { struct gfs2_holder freeze_gh; - int error; - - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, - &freeze_gh); - if (error && !gfs2_withdrawn(sdp)) - return error; + int error = 0; + int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + gfs2_holder_mark_uninitialized(&freeze_gh); + if (sdp->sd_freeze_gl && + !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { + if (!log_write_allowed) { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, GL_NOCACHE | + LM_FLAG_TRY, &freeze_gh); + if (error == GLR_TRYFAILED) + error = 0; + } else { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, GL_NOCACHE, + &freeze_gh); + if (error && !gfs2_withdrawn(sdp)) + return error; + } + } flush_workqueue(gfs2_delete_workqueue); - if (sdp->sd_quotad_process) + if (!log_write_allowed && current == sdp->sd_quotad_process) + fs_warn(sdp, "The quotad daemon is withdrawing.\n"); + else if (sdp->sd_quotad_process) kthread_stop(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; - if (sdp->sd_logd_process) + + if (!log_write_allowed && current == sdp->sd_logd_process) + fs_warn(sdp, "The logd daemon is withdrawing.\n"); + else if (sdp->sd_logd_process) kthread_stop(sdp->sd_logd_process); sdp->sd_logd_process = NULL; - gfs2_quota_sync(sdp->sd_vfs, 0); - gfs2_statfs_sync(sdp->sd_vfs, 0); - - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | - GFS2_LFC_MAKE_FS_RO); - wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); - gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); + if (log_write_allowed) { + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | + GFS2_LFC_MAKE_FS_RO); + wait_event(sdp->sd_reserving_log_wait, + atomic_read(&sdp->sd_reserving_log) == 0); + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == + sdp->sd_jdesc->jd_blocks); + } else { + wait_event_timeout(sdp->sd_reserving_log_wait, + atomic_read(&sdp->sd_reserving_log) == 0, + HZ * 5); + } if (gfs2_holder_initialized(&freeze_gh)) gfs2_glock_dq_uninit(&freeze_gh); gfs2_quota_cleanup(sdp); + if (!log_write_allowed) + sdp->sd_vfs->s_flags |= SB_RDONLY; + return error; } @@ -677,8 +712,10 @@ restart: gfs2_glock_put(sdp->sd_freeze_gl); if (!sdp->sd_args.ar_spectator) { - gfs2_glock_dq_uninit(&sdp->sd_journal_gh); - gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + if (gfs2_holder_initialized(&sdp->sd_journal_gh)) + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + if (gfs2_holder_initialized(&sdp->sd_jinode_gh)) + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); gfs2_glock_dq_uninit(&sdp->sd_sc_gh); gfs2_glock_dq_uninit(&sdp->sd_qc_gh); iput(sdp->sd_sc_inode); @@ -1356,14 +1393,6 @@ out_unlock: if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (gfs2_holder_initialized(&ip->i_iopen_gh)) { - glock_clear_object(ip->i_iopen_gh.gh_gl, ip); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&ip->i_iopen_gh); - } - gfs2_holder_uninit(&ip->i_iopen_gh); - } if (gfs2_holder_initialized(&gh)) { glock_clear_object(ip->i_gl, ip); gfs2_glock_dq_uninit(&gh); @@ -1372,22 +1401,30 @@ out_unlock: fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: truncate_inode_pages_final(&inode->i_data); - gfs2_rsqa_delete(ip, NULL); + if (ip->i_qadata) + gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); + gfs2_rs_delete(ip, NULL); + gfs2_qa_put(ip); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); - glock_clear_object(ip->i_gl, ip); - wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); - gfs2_glock_add_to_lru(ip->i_gl); - gfs2_glock_put_eventually(ip->i_gl); - ip->i_gl = NULL; + if (ip->i_gl) { + glock_clear_object(ip->i_gl, ip); + wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); + gfs2_glock_add_to_lru(ip->i_gl); + gfs2_glock_put_eventually(ip->i_gl); + ip->i_gl = NULL; + } if (gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; glock_clear_object(gl, ip); - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq(&ip->i_iopen_gh); + } gfs2_glock_hold(gl); - gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } } @@ -1401,6 +1438,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) return NULL; ip->i_flags = 0; ip->i_gl = NULL; + gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_rahead = 0; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index b8bf811a1305..51900554ed81 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -26,7 +26,6 @@ extern void gfs2_jindex_free(struct gfs2_sbd *sdp); extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); - extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, struct gfs2_inode **ipp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 8ccb68f4ed16..d28c41bd69b0 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -136,7 +136,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) if (val != 1) return -EINVAL; - gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n"); + gfs2_lm(sdp, "withdrawing from cluster at user's request\n"); + gfs2_withdraw(sdp); return len; } @@ -434,6 +435,8 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) * never clear the DFL_BLOCK_LOCKS flag, so all our locks would * permanently stop working. */ + if (!sdp->sd_jdesc) + goto out; if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator) goto out; rv = -ENOENT; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index a685637a5b55..ffe840505082 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -228,6 +228,10 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) fs_info(sdp, "GFS2:adding buf while frozen\n"); gfs2_assert_withdraw(sdp, 0); } + if (unlikely(gfs2_withdrawn(sdp))) { + fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n", + (unsigned long long)bd->bd_bh->b_blocknr); + } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index ec600b487498..9b64d40ab379 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -11,12 +11,18 @@ #include <linux/buffer_head.h> #include <linux/crc32.h> #include <linux/gfs2_ondisk.h> +#include <linux/delay.h> #include <linux/uaccess.h> #include "gfs2.h" #include "incore.h" #include "glock.h" +#include "glops.h" +#include "log.h" +#include "lops.h" +#include "recovery.h" #include "rgrp.h" +#include "super.h" #include "util.h" struct kmem_cache *gfs2_glock_cachep __read_mostly; @@ -33,32 +39,257 @@ void gfs2_assert_i(struct gfs2_sbd *sdp) fs_emerg(sdp, "fatal assertion failed\n"); } -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) +/** + * check_journal_clean - Make sure a journal is clean for a spectator mount + * @sdp: The GFS2 superblock + * @jd: The journal descriptor + * + * Returns: 0 if the journal is clean or locked, else an error + */ +int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + bool verbose) +{ + int error; + struct gfs2_holder j_gh; + struct gfs2_log_header_host head; + struct gfs2_inode *ip; + + ip = GFS2_I(jd->jd_inode); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + GL_EXACT | GL_NOCACHE, &j_gh); + if (error) { + if (verbose) + fs_err(sdp, "Error %d locking journal for spectator " + "mount.\n", error); + return -EPERM; + } + error = gfs2_jdesc_check(jd); + if (error) { + if (verbose) + fs_err(sdp, "Error checking journal for spectator " + "mount.\n"); + goto out_unlock; + } + error = gfs2_find_jhead(jd, &head, false); + if (error) { + if (verbose) + fs_err(sdp, "Error parsing journal for spectator " + "mount.\n"); + goto out_unlock; + } + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EPERM; + if (verbose) + fs_err(sdp, "jid=%u: Journal is dirty, so the first " + "mounter must not be a spectator.\n", + jd->jd_jid); + } + +out_unlock: + gfs2_glock_dq_uninit(&j_gh); + return error; +} + +static void signal_our_withdraw(struct gfs2_sbd *sdp) +{ + struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl; + struct inode *inode = sdp->sd_jdesc->jd_inode; + struct gfs2_inode *ip = GFS2_I(inode); + u64 no_formal_ino = ip->i_no_formal_ino; + int ret = 0; + int tries; + + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + return; + + /* Prevent any glock dq until withdraw recovery is complete */ + set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + /* + * Don't tell dlm we're bailing until we have no more buffers in the + * wind. If journal had an IO error, the log code should just purge + * the outstanding buffers rather than submitting new IO. Making the + * file system read-only will flush the journal, etc. + * + * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown + * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write + * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and + * therefore we need to clear SDF_JOURNAL_LIVE manually. + */ + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + if (!sb_rdonly(sdp->sd_vfs)) + ret = gfs2_make_fs_ro(sdp); + + /* + * Drop the glock for our journal so another node can recover it. + */ + if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { + gfs2_glock_dq_wait(&sdp->sd_journal_gh); + gfs2_holder_uninit(&sdp->sd_journal_gh); + } + sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq(&sdp->sd_jinode_gh); + if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { + /* Make sure gfs2_unfreeze works if partially-frozen */ + flush_workqueue(gfs2_freeze_wq); + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + thaw_super(sdp->sd_vfs); + } else { + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); + } + + /* + * holder_uninit to force glock_put, to force dlm to let go + */ + gfs2_holder_uninit(&sdp->sd_jinode_gh); + + /* + * Note: We need to be careful here: + * Our iput of jd_inode will evict it. The evict will dequeue its + * glock, but the glock dq will wait for the withdraw unless we have + * exception code in glock_dq. + */ + iput(inode); + /* + * Wait until the journal inode's glock is freed. This allows try locks + * on other nodes to be successful, otherwise we remain the owner of + * the glock as far as dlm is concerned. + */ + if (gl->gl_ops->go_free) { + set_bit(GLF_FREEING, &gl->gl_flags); + wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); + } + + if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + goto skip_recovery; + } + /* + * Dequeue the "live" glock, but keep a reference so it's never freed. + */ + gfs2_glock_hold(gl); + gfs2_glock_dq_wait(&sdp->sd_live_gh); + /* + * We enqueue the "live" glock in EX so that all other nodes + * get a demote request and act on it. We don't really want the + * lock in EX, so we send a "try" lock with 1CB to produce a callback. + */ + fs_warn(sdp, "Requesting recovery of jid %d.\n", + sdp->sd_lockstruct.ls_jid); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP, + &sdp->sd_live_gh); + msleep(GL_GLOCK_MAX_HOLD); + /* + * This will likely fail in a cluster, but succeed standalone: + */ + ret = gfs2_glock_nq(&sdp->sd_live_gh); + + /* + * If we actually got the "live" lock in EX mode, there are no other + * nodes available to replay our journal. So we try to replay it + * ourselves. We hold the "live" glock to prevent other mounters + * during recovery, then just dequeue it and reacquire it in our + * normal SH mode. Just in case the problem that caused us to + * withdraw prevents us from recovering our journal (e.g. io errors + * and such) we still check if the journal is clean before proceeding + * but we may wait forever until another mounter does the recovery. + */ + if (ret == 0) { + fs_warn(sdp, "No other mounters found. Trying to recover our " + "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); + if (gfs2_recover_journal(sdp->sd_jdesc, 1)) + fs_warn(sdp, "Unable to recover our journal jid %d.\n", + sdp->sd_lockstruct.ls_jid); + gfs2_glock_dq_wait(&sdp->sd_live_gh); + gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT, + &sdp->sd_live_gh); + gfs2_glock_nq(&sdp->sd_live_gh); + } + + gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + + /* + * At this point our journal is evicted, so we need to get a new inode + * for it. Once done, we need to call gfs2_find_jhead which + * calls gfs2_map_journal_extents to map it for us again. + * + * Note that we don't really want it to look up a FREE block. The + * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup + * which would otherwise fail because it requires grabbing an rgrp + * glock, which would fail with -EIO because we're withdrawing. + */ + inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, + sdp->sd_jdesc->jd_no_addr, no_formal_ino, + GFS2_BLKST_FREE); + if (IS_ERR(inode)) { + fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", + sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); + goto skip_recovery; + } + sdp->sd_jdesc->jd_inode = inode; + + /* + * Now wait until recovery is complete. + */ + for (tries = 0; tries < 10; tries++) { + ret = check_journal_clean(sdp, sdp->sd_jdesc, false); + if (!ret) + break; + msleep(HZ); + fs_warn(sdp, "Waiting for journal recovery jid %d.\n", + sdp->sd_lockstruct.ls_jid); + } +skip_recovery: + if (!ret) + fs_warn(sdp, "Journal recovery complete for jid %d.\n", + sdp->sd_lockstruct.ls_jid); + else + fs_warn(sdp, "Journal recovery skipped for %d until next " + "mount.\n", sdp->sd_lockstruct.ls_jid); + fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); + sdp->sd_glock_dqs_held = 0; + wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); +} + +void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) { - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - const struct lm_lockops *lm = ls->ls_ops; - va_list args; struct va_format vaf; + va_list args; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && - test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) - return 0; - - if (fmt) { - va_start(args, fmt); + test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + fs_err(sdp, "%pV", &vaf); + va_end(args); +} - vaf.fmt = fmt; - vaf.va = &args; +int gfs2_withdraw(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + const struct lm_lockops *lm = ls->ls_ops; - fs_err(sdp, "%pV", &vaf); + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && + test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { + if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)) + return -1; - va_end(args); + wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG, + TASK_UNINTERRUPTIBLE); + return -1; } + set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { fs_err(sdp, "about to withdraw this file system\n"); BUG_ON(sdp->sd_args.ar_debug); + signal_our_withdraw(sdp); + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) @@ -69,8 +300,11 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) lm->lm_unmount(sdp); } set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); - fs_err(sdp, "withdrawn\n"); + fs_err(sdp, "File system withdrawn\n"); dump_stack(); + clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); + smp_mb__after_atomic(); + wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) @@ -81,35 +315,45 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) /** * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false - * Returns: -1 if this call withdrew the machine, - * -2 if it was already withdrawn */ -int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line) +void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line, + bool delayed) { - int me; - me = gfs2_lm_withdraw(sdp, - "fatal: assertion \"%s\" failed\n" - " function = %s, file = %s, line = %u\n", - assertion, function, file, line); + if (gfs2_withdrawn(sdp)) + return; + + fs_err(sdp, + "fatal: assertion \"%s\" failed\n" + " function = %s, file = %s, line = %u\n", + assertion, function, file, line); + + /* + * If errors=panic was specified on mount, it won't help to delay the + * withdraw. + */ + if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) + delayed = false; + + if (delayed) + gfs2_withdraw_delayed(sdp); + else + gfs2_withdraw(sdp); dump_stack(); - return (me) ? -1 : -2; } /** * gfs2_assert_warn_i - Print a message to the console if @assertion is false - * Returns: -1 if we printed something - * -2 if we didn't */ -int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line) +void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) { if (time_before(jiffies, sdp->sd_last_warning + gfs2_tune_get(sdp, gt_complain_secs) * HZ)) - return -2; + return; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", @@ -127,69 +371,59 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, sdp->sd_fsname, function, file, line); sdp->sd_last_warning = jiffies; - - return -1; } /** * gfs2_consist_i - Flag a filesystem consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn */ -int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, - char *file, unsigned int line) +void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, + char *file, unsigned int line) { - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", - function, file, line); - return rv; + gfs2_lm(sdp, + "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", + function, file, line); + gfs2_withdraw(sdp); } /** * gfs2_consist_inode_i - Flag an inode consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn */ -int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, - const char *function, char *file, unsigned int line) +void gfs2_consist_inode_i(struct gfs2_inode *ip, + const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: filesystem consistency error\n" - " inode = %llu %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr, - function, file, line); - return rv; + + gfs2_lm(sdp, + "fatal: filesystem consistency error\n" + " inode = %llu %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr, + function, file, line); + gfs2_withdraw(sdp); } /** * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn */ -int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, - const char *function, char *file, unsigned int line) +void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, + const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = rgd->rd_sbd; char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; - int rv; sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); - rv = gfs2_lm_withdraw(sdp, - "fatal: filesystem consistency error\n" - " RG = %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)rgd->rd_addr, - function, file, line); - return rv; + gfs2_lm(sdp, + "fatal: filesystem consistency error\n" + " RG = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)rgd->rd_addr, + function, file, line); + gfs2_withdraw(sdp); } /** @@ -203,12 +437,14 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, unsigned int line) { int me; - me = gfs2_lm_withdraw(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (%s)\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, type, - function, file, line); + + gfs2_lm(sdp, + "fatal: invalid metadata block\n" + " bh = %llu (%s)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, + function, file, line); + me = gfs2_withdraw(sdp); return (me) ? -1 : -2; } @@ -223,12 +459,14 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, char *file, unsigned int line) { int me; - me = gfs2_lm_withdraw(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (type: exp=%u, found=%u)\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, type, t, - function, file, line); + + gfs2_lm(sdp, + "fatal: invalid metadata block\n" + " bh = %llu (type: exp=%u, found=%u)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, t, + function, file, line); + me = gfs2_withdraw(sdp); return (me) ? -1 : -2; } @@ -241,12 +479,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line) { - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: I/O error\n" - " function = %s, file = %s, line = %u\n", - function, file, line); - return rv; + gfs2_lm(sdp, + "fatal: I/O error\n" + " function = %s, file = %s, line = %u\n", + function, file, line); + return gfs2_withdraw(sdp); } /** @@ -258,14 +495,14 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line, bool withdraw) { - if (!gfs2_withdrawn(sdp)) - fs_err(sdp, - "fatal: I/O error\n" - " block = %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, - function, file, line); + if (gfs2_withdrawn(sdp)) + return; + + fs_err(sdp, "fatal: I/O error\n" + " block = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, function, file, line); if (withdraw) - gfs2_lm_withdraw(sdp, NULL); + gfs2_withdraw(sdp); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index f2702bc9837c..a3542560da6f 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -36,41 +36,59 @@ do { \ } while (0) -int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line); +void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line, + bool delayed); #define gfs2_assert_withdraw(sdp, assertion) \ -((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__)) - - -int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line); + ({ \ + bool _bool = (assertion); \ + if (unlikely(!_bool)) \ + gfs2_assert_withdraw_i((sdp), #assertion, \ + __func__, __FILE__, __LINE__, false); \ + !_bool; \ + }) + +#define gfs2_assert_withdraw_delayed(sdp, assertion) \ + ({ \ + bool _bool = (assertion); \ + if (unlikely(!_bool)) \ + gfs2_assert_withdraw_i((sdp), #assertion, \ + __func__, __FILE__, __LINE__, true); \ + !_bool; \ + }) + +void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line); #define gfs2_assert_warn(sdp, assertion) \ -((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__)) - + ({ \ + bool _bool = (assertion); \ + if (unlikely(!_bool)) \ + gfs2_assert_warn_i((sdp), #assertion, \ + __func__, __FILE__, __LINE__); \ + !_bool; \ + }) -int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, - const char *function, char *file, unsigned int line); +void gfs2_consist_i(struct gfs2_sbd *sdp, + const char *function, char *file, unsigned int line); #define gfs2_consist(sdp) \ -gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__) +gfs2_consist_i((sdp), __func__, __FILE__, __LINE__) -int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, - const char *function, char *file, unsigned int line); +void gfs2_consist_inode_i(struct gfs2_inode *ip, + const char *function, char *file, unsigned int line); #define gfs2_consist_inode(ip) \ -gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__) +gfs2_consist_inode_i((ip), __func__, __FILE__, __LINE__) -int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, - const char *function, char *file, unsigned int line); +void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, + const char *function, char *file, unsigned int line); #define gfs2_consist_rgrpd(rgd) \ -gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__) +gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__) int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -129,6 +147,9 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); +extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + bool verbose); + #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); @@ -165,18 +186,29 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, } /** + * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks + * @sdp: the superblock + */ +static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp) +{ + set_bit(SDF_WITHDRAWING, &sdp->sd_flags); +} + +/** * gfs2_withdrawn - test whether the file system is withdrawing or withdrawn * @sdp: the superblock */ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) { - return test_bit(SDF_WITHDRAWN, &sdp->sd_flags); + return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || + test_bit(SDF_WITHDRAWING, &sdp->sd_flags); } #define gfs2_tune_get(sdp, field) \ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) __printf(2, 3) -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...); +void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...); +int gfs2_withdraw(struct gfs2_sbd *sdp); #endif /* __UTIL_DOT_H__ */ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index bbe593d16bea..9d7667bc4292 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1222,7 +1222,7 @@ static int gfs2_xattr_set(const struct xattr_handler *handler, struct gfs2_holder gh; int ret; - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) return ret; @@ -1231,15 +1231,19 @@ static int gfs2_xattr_set(const struct xattr_handler *handler, if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) - return ret; + goto out; } else { - if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) - return -EIO; + if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) { + ret = -EIO; + goto out; + } gfs2_holder_mark_uninitialized(&gh); } ret = __gfs2_xattr_set(inode, name, value, size, flags, handler->flags); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); +out: + gfs2_qa_put(ip); return ret; } |