summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf_item.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r--fs/xfs/xfs_buf_item.c422
1 files changed, 212 insertions, 210 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 9e75e8d6042e..5bb6f22cc11a 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -12,8 +12,13 @@
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_trans.h"
-#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include "xfs_trace.h"
#include "xfs_log.h"
@@ -25,7 +30,7 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
-STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
+static void xfs_buf_item_done(struct xfs_buf *bp);
/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
@@ -457,10 +462,9 @@ xfs_buf_item_unpin(
* the AIL lock.
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
- xfs_buf_do_callbacks(bp);
- bp->b_log_item = NULL;
- list_del_init(&bp->b_li_list);
- bp->b_iodone = NULL;
+ xfs_buf_item_done(bp);
+ xfs_iflush_done(bp);
+ ASSERT(list_empty(&bp->b_li_list));
} else {
xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
@@ -734,7 +738,7 @@ xfs_buf_item_init(
return 0;
}
- bip = kmem_zone_zalloc(xfs_buf_item_zone, 0);
+ bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp;
@@ -936,11 +940,7 @@ xfs_buf_item_free(
}
/*
- * This is called when the buf log item is no longer needed. It should
- * free the buf log item associated with the given buffer and clear
- * the buffer's pointer to the buf log item. If there are no more
- * items in the list, clear the b_iodone field of the buffer (see
- * xfs_buf_attach_iodone() below).
+ * xfs_buf_item_relse() is called when the buf log item is no longer needed.
*/
void
xfs_buf_item_relse(
@@ -952,137 +952,28 @@ xfs_buf_item_relse(
ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL;
- if (list_empty(&bp->b_li_list))
- bp->b_iodone = NULL;
-
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
}
-
/*
- * Add the given log item with its callback to the list of callbacks
- * to be called when the buffer's I/O completes. If it is not set
- * already, set the buffer's b_iodone() routine to be
- * xfs_buf_iodone_callbacks() and link the log item into the list of
- * items rooted at b_li_list.
+ * Decide if we're going to retry the write after a failure, and prepare
+ * the buffer for retrying the write.
*/
-void
-xfs_buf_attach_iodone(
- struct xfs_buf *bp,
- void (*cb)(struct xfs_buf *, struct xfs_log_item *),
- struct xfs_log_item *lip)
-{
- ASSERT(xfs_buf_islocked(bp));
-
- lip->li_cb = cb;
- list_add_tail(&lip->li_bio_list, &bp->b_li_list);
-
- ASSERT(bp->b_iodone == NULL ||
- bp->b_iodone == xfs_buf_iodone_callbacks);
- bp->b_iodone = xfs_buf_iodone_callbacks;
-}
-
-/*
- * We can have many callbacks on a buffer. Running the callbacks individually
- * can cause a lot of contention on the AIL lock, so we allow for a single
- * callback to be able to scan the remaining items in bp->b_li_list for other
- * items of the same type and callback to be processed in the first call.
- *
- * As a result, the loop walking the callback list below will also modify the
- * list. it removes the first item from the list and then runs the callback.
- * The loop then restarts from the new first item int the list. This allows the
- * callback to scan and modify the list attached to the buffer and we don't
- * have to care about maintaining a next item pointer.
- */
-STATIC void
-xfs_buf_do_callbacks(
- struct xfs_buf *bp)
-{
- struct xfs_buf_log_item *blip = bp->b_log_item;
- struct xfs_log_item *lip;
-
- /* If there is a buf_log_item attached, run its callback */
- if (blip) {
- lip = &blip->bli_item;
- lip->li_cb(bp, lip);
- }
-
- while (!list_empty(&bp->b_li_list)) {
- lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
- li_bio_list);
-
- /*
- * Remove the item from the list, so we don't have any
- * confusion if the item is added to another buf.
- * Don't touch the log item after calling its
- * callback, because it could have freed itself.
- */
- list_del_init(&lip->li_bio_list);
- lip->li_cb(bp, lip);
- }
-}
-
-/*
- * Invoke the error state callback for each log item affected by the failed I/O.
- *
- * If a metadata buffer write fails with a non-permanent error, the buffer is
- * eventually resubmitted and so the completion callbacks are not run. The error
- * state may need to be propagated to the log items attached to the buffer,
- * however, so the next AIL push of the item knows hot to handle it correctly.
- */
-STATIC void
-xfs_buf_do_callbacks_fail(
- struct xfs_buf *bp)
-{
- struct xfs_log_item *lip;
- struct xfs_ail *ailp;
-
- /*
- * Buffer log item errors are handled directly by xfs_buf_item_push()
- * and xfs_buf_iodone_callback_error, and they have no IO error
- * callbacks. Check only for items in b_li_list.
- */
- if (list_empty(&bp->b_li_list))
- return;
-
- lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
- li_bio_list);
- ailp = lip->li_ailp;
- spin_lock(&ailp->ail_lock);
- list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
- if (lip->li_ops->iop_error)
- lip->li_ops->iop_error(lip, bp);
- }
- spin_unlock(&ailp->ail_lock);
-}
-
static bool
-xfs_buf_iodone_callback_error(
+xfs_buf_ioerror_fail_without_retry(
struct xfs_buf *bp)
{
- struct xfs_buf_log_item *bip = bp->b_log_item;
- struct xfs_log_item *lip;
- struct xfs_mount *mp;
+ struct xfs_mount *mp = bp->b_mount;
static ulong lasttime;
static xfs_buftarg_t *lasttarg;
- struct xfs_error_cfg *cfg;
-
- /*
- * The failed buffer might not have a buf_log_item attached or the
- * log_item list might be empty. Get the mp from the available
- * xfs_log_item
- */
- lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item,
- li_bio_list);
- mp = lip ? lip->li_mountp : bip->bli_item.li_mountp;
/*
* If we've already decided to shutdown the filesystem because of
* I/O errors, there's no point in giving this a retry.
*/
if (XFS_FORCED_SHUTDOWN(mp))
- goto out_stale;
+ return true;
if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) {
@@ -1093,129 +984,240 @@ xfs_buf_iodone_callback_error(
/* synchronous writes will have callers process the error */
if (!(bp->b_flags & XBF_ASYNC))
- goto out_stale;
-
- trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
- ASSERT(bp->b_iodone != NULL);
-
- cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
+ return true;
+ return false;
+}
- /*
- * If the write was asynchronous then no one will be looking for the
- * error. If this is the first failure of this type, clear the error
- * state and write the buffer out again. This means we always retry an
- * async write failure at least once, but we also need to set the buffer
- * up to behave correctly now for repeated failures.
- */
- if (!(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) ||
- bp->b_last_error != bp->b_error) {
- bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
- bp->b_last_error = bp->b_error;
- if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
- !bp->b_first_retry_time)
- bp->b_first_retry_time = jiffies;
+static bool
+xfs_buf_ioerror_retry(
+ struct xfs_buf *bp,
+ struct xfs_error_cfg *cfg)
+{
+ if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) &&
+ bp->b_last_error == bp->b_error)
+ return false;
- xfs_buf_ioerror(bp, 0);
- xfs_buf_submit(bp);
- return true;
- }
+ bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL);
+ bp->b_last_error = bp->b_error;
+ if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
+ !bp->b_first_retry_time)
+ bp->b_first_retry_time = jiffies;
+ return true;
+}
- /*
- * Repeated failure on an async write. Take action according to the
- * error configuration we have been set up to use.
- */
+/*
+ * Account for this latest trip around the retry handler, and decide if
+ * we've failed enough times to constitute a permanent failure.
+ */
+static bool
+xfs_buf_ioerror_permanent(
+ struct xfs_buf *bp,
+ struct xfs_error_cfg *cfg)
+{
+ struct xfs_mount *mp = bp->b_mount;
if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
++bp->b_retries > cfg->max_retries)
- goto permanent_error;
+ return true;
if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
- goto permanent_error;
+ return true;
/* At unmount we may treat errors differently */
if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
- goto permanent_error;
+ return true;
- /*
- * Still a transient error, run IO completion failure callbacks and let
- * the higher layers retry the buffer.
- */
- xfs_buf_do_callbacks_fail(bp);
- xfs_buf_ioerror(bp, 0);
- xfs_buf_relse(bp);
- return true;
+ return false;
+}
+
+/*
+ * On a sync write or shutdown we just want to stale the buffer and let the
+ * caller handle the error in bp->b_error appropriately.
+ *
+ * If the write was asynchronous then no one will be looking for the error. If
+ * this is the first failure of this type, clear the error state and write the
+ * buffer out again. This means we always retry an async write failure at least
+ * once, but we also need to set the buffer up to behave correctly now for
+ * repeated failures.
+ *
+ * If we get repeated async write failures, then we take action according to the
+ * error configuration we have been set up to use.
+ *
+ * Multi-state return value:
+ *
+ * XBF_IOERROR_FINISH: clear IO error retry state and run callback completions
+ * XBF_IOERROR_DONE: resubmitted immediately, do not run any completions
+ * XBF_IOERROR_FAIL: transient error, run failure callback completions and then
+ * release the buffer
+ */
+enum {
+ XBF_IOERROR_FINISH,
+ XBF_IOERROR_DONE,
+ XBF_IOERROR_FAIL,
+};
+
+static int
+xfs_buf_iodone_error(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_error_cfg *cfg;
+
+ if (xfs_buf_ioerror_fail_without_retry(bp))
+ goto out_stale;
+
+ trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+
+ cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
+ if (xfs_buf_ioerror_retry(bp, cfg)) {
+ xfs_buf_ioerror(bp, 0);
+ xfs_buf_submit(bp);
+ return XBF_IOERROR_DONE;
+ }
/*
* Permanent error - we need to trigger a shutdown if we haven't already
* to indicate that inconsistency will result from this action.
*/
-permanent_error:
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ if (xfs_buf_ioerror_permanent(bp, cfg)) {
+ xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ goto out_stale;
+ }
+
+ /* Still considered a transient error. Caller will schedule retries. */
+ return XBF_IOERROR_FAIL;
+
out_stale:
xfs_buf_stale(bp);
bp->b_flags |= XBF_DONE;
trace_xfs_buf_error_relse(bp, _RET_IP_);
- return false;
+ return XBF_IOERROR_FINISH;
}
-/*
- * This is the iodone() function for buffers which have had callbacks attached
- * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
- * callback list, mark the buffer as having no more callbacks and then push the
- * buffer through IO completion processing.
- */
-void
-xfs_buf_iodone_callbacks(
+static void
+xfs_buf_item_done(
struct xfs_buf *bp)
{
- /*
- * If there is an error, process it. Some errors require us
- * to run callbacks after failure processing is done so we
- * detect that and take appropriate action.
- */
- if (bp->b_error && xfs_buf_iodone_callback_error(bp))
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+
+ if (!bip)
return;
/*
- * Successful IO or permanent error. Either way, we can clear the
- * retry state here in preparation for the next error that may occur.
+ * If we are forcibly shutting down, this may well be off the AIL
+ * already. That's because we simulate the log-committed callbacks to
+ * unpin these buffers. Or we may never have put this item on AIL
+ * because of the transaction was aborted forcibly.
+ * xfs_trans_ail_delete() takes care of these.
+ *
+ * Either way, AIL is useless if we're forcing a shutdown.
*/
+ xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE);
+ bp->b_log_item = NULL;
+ xfs_buf_item_free(bip);
+ xfs_buf_rele(bp);
+}
+
+static inline void
+xfs_buf_clear_ioerror_retry_state(
+ struct xfs_buf *bp)
+{
bp->b_last_error = 0;
bp->b_retries = 0;
bp->b_first_retry_time = 0;
+}
- xfs_buf_do_callbacks(bp);
- bp->b_log_item = NULL;
- list_del_init(&bp->b_li_list);
- bp->b_iodone = NULL;
- xfs_buf_ioend(bp);
+/*
+ * Inode buffer iodone callback function.
+ */
+void
+xfs_buf_inode_iodone(
+ struct xfs_buf *bp)
+{
+ if (bp->b_error) {
+ struct xfs_log_item *lip;
+ int ret = xfs_buf_iodone_error(bp);
+
+ if (ret == XBF_IOERROR_FINISH)
+ goto finish_iodone;
+ if (ret == XBF_IOERROR_DONE)
+ return;
+ ASSERT(ret == XBF_IOERROR_FAIL);
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
+ set_bit(XFS_LI_FAILED, &lip->li_flags);
+ }
+ xfs_buf_ioerror(bp, 0);
+ xfs_buf_relse(bp);
+ return;
+ }
+
+finish_iodone:
+ xfs_buf_clear_ioerror_retry_state(bp);
+ xfs_buf_item_done(bp);
+ xfs_iflush_done(bp);
+ xfs_buf_ioend_finish(bp);
}
/*
- * This is the iodone() function for buffers which have been
- * logged. It is called when they are eventually flushed out.
- * It should remove the buf item from the AIL, and free the buf item.
- * It is called by xfs_buf_iodone_callbacks() above which will take
- * care of cleaning up the buffer itself.
+ * Dquot buffer iodone callback function.
*/
void
-xfs_buf_iodone(
- struct xfs_buf *bp,
- struct xfs_log_item *lip)
+xfs_buf_dquot_iodone(
+ struct xfs_buf *bp)
{
- ASSERT(BUF_ITEM(lip)->bli_buf == bp);
+ if (bp->b_error) {
+ struct xfs_log_item *lip;
+ int ret = xfs_buf_iodone_error(bp);
+
+ if (ret == XBF_IOERROR_FINISH)
+ goto finish_iodone;
+ if (ret == XBF_IOERROR_DONE)
+ return;
+ ASSERT(ret == XBF_IOERROR_FAIL);
+ spin_lock(&bp->b_mount->m_ail->ail_lock);
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
+ xfs_set_li_failed(lip, bp);
+ }
+ spin_unlock(&bp->b_mount->m_ail->ail_lock);
+ xfs_buf_ioerror(bp, 0);
+ xfs_buf_relse(bp);
+ return;
+ }
- xfs_buf_rele(bp);
+finish_iodone:
+ xfs_buf_clear_ioerror_retry_state(bp);
+ /* a newly allocated dquot buffer might have a log item attached */
+ xfs_buf_item_done(bp);
+ xfs_dquot_done(bp);
+ xfs_buf_ioend_finish(bp);
+}
- /*
- * If we are forcibly shutting down, this may well be off the AIL
- * already. That's because we simulate the log-committed callbacks to
- * unpin these buffers. Or we may never have put this item on AIL
- * because of the transaction was aborted forcibly.
- * xfs_trans_ail_delete() takes care of these.
- *
- * Either way, AIL is useless if we're forcing a shutdown.
- */
- xfs_trans_ail_delete(lip, SHUTDOWN_CORRUPT_INCORE);
- xfs_buf_item_free(BUF_ITEM(lip));
+/*
+ * Dirty buffer iodone callback function.
+ *
+ * Note that for things like remote attribute buffers, there may not be a buffer
+ * log item here, so processing the buffer log item must remain be optional.
+ */
+void
+xfs_buf_iodone(
+ struct xfs_buf *bp)
+{
+ if (bp->b_error) {
+ int ret = xfs_buf_iodone_error(bp);
+
+ if (ret == XBF_IOERROR_FINISH)
+ goto finish_iodone;
+ if (ret == XBF_IOERROR_DONE)
+ return;
+ ASSERT(ret == XBF_IOERROR_FAIL);
+ ASSERT(list_empty(&bp->b_li_list));
+ xfs_buf_ioerror(bp, 0);
+ xfs_buf_relse(bp);
+ return;
+ }
+
+finish_iodone:
+ xfs_buf_clear_ioerror_retry_state(bp);
+ xfs_buf_item_done(bp);
+ xfs_buf_ioend_finish(bp);
}