From 43ff2122e6492bcc88b065c433453dce88223b30 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Apr 2012 15:58:39 +1000 Subject: xfs: on-stack delayed write buffer lists Queue delwri buffers on a local on-stack list instead of a per-buftarg one, and write back the buffers per-process instead of by waking up xfsbufd. This is now easily doable given that we have very few places left that write delwri buffers: - log recovery: Only done at mount time, and already forcing out the buffers synchronously using xfs_flush_buftarg - quotacheck: Same story. - dquot reclaim: Writes out dirty dquots on the LRU under memory pressure. We might want to look into doing more of this via xfsaild, but it's already more optimal than the synchronous inode reclaim that writes each buffer synchronously. - xfsaild: This is the main beneficiary of the change. By keeping a local list of buffers to write we reduce latency of writing out buffers, and more importably we can remove all the delwri list promotions which were hitting the buffer cache hard under sustained metadata loads. The implementation is very straight forward - xfs_buf_delwri_queue now gets a new list_head pointer that it adds the delwri buffers to, and all callers need to eventually submit the list using xfs_buf_delwi_submit or xfs_buf_delwi_submit_nowait. Buffers that already are on a delwri list are skipped in xfs_buf_delwri_queue, assuming they already are on another delwri list. The biggest change to pass down the buffer list was done to the AIL pushing. Now that we operate on buffers the trylock, push and pushbuf log item methods are merged into a single push routine, which tries to lock the item, and if possible add the buffer that needs writeback to the buffer list. This leads to much simpler code than the previous split but requires the individual IOP_PUSH instances to unlock and reacquire the AIL around calls to blocking routines. Given that xfsailds now also handle writing out buffers, the conditions for log forcing and the sleep times needed some small changes. The most important one is that we consider an AIL busy as long we still have buffers to push, and the other one is that we do increment the pushed LSN for buffers that are under flushing at this moment, but still count them towards the stuck items for restart purposes. Without this we could hammer on stuck items without ever forcing the log and not make progress under heavy random delete workloads on fast flash storage devices. [ Dave Chinner: - rebase on previous patches. - improved comments for XBF_DELWRI_Q handling - fix XBF_ASYNC handling in queue submission (test 106 failure) - rename delwri submit function buffer list parameters for clarity - xfs_efd_item_push() should return XFS_ITEM_PINNED ] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_trace.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 06838c42b2a0..2e41756e263a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -328,7 +328,7 @@ DEFINE_BUF_EVENT(xfs_buf_unlock); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); -DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); +DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_bdstrat_shut); @@ -486,12 +486,10 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); @@ -881,10 +879,9 @@ DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ TP_ARGS(lip)) DEFINE_LOG_ITEM_EVENT(xfs_ail_push); -DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf); -DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); +DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DECLARE_EVENT_CLASS(xfs_file_class, -- cgit v1.2.3 From 4e94b71b7068b4bd9c615301197e09dbf0c3b770 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 23 Apr 2012 15:58:51 +1000 Subject: xfs: use blocks for counting length of buffers Now that we pass block counts everywhere, and index buffers by block number, track the length of the buffer in units of blocks rather than bytes. Convert the code to use block counts, and those that need byte counts get converted at the time of use. Also, remove the XFS_BUF_{SET_}SIZE() macros that are just wrappers around the buffer length. They only serve to make the code shouty loud and don't actually add any real value. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_attr.c | 15 +++++++++------ fs/xfs/xfs_buf.c | 22 ++++++++++++---------- fs/xfs/xfs_buf.h | 4 +--- fs/xfs/xfs_log.c | 5 +---- fs/xfs/xfs_log_recover.c | 8 ++++---- fs/xfs/xfs_trace.h | 14 +++++++------- 6 files changed, 34 insertions(+), 34 deletions(-) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 65d61b948ead..6e9bd7e46982 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -1993,8 +1993,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) if (error) return(error); - tmp = (valuelen < XFS_BUF_SIZE(bp)) - ? valuelen : XFS_BUF_SIZE(bp); + tmp = min_t(int, valuelen, BBTOB(bp->b_length)); xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); xfs_buf_relse(bp); dst += tmp; @@ -2097,6 +2096,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) lblkno = args->rmtblkno; valuelen = args->valuelen; while (valuelen > 0) { + int buflen; + /* * Try to remember where we decided to put the value. */ @@ -2118,11 +2119,13 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) XBF_LOCK | XBF_DONT_BLOCK); if (!bp) return ENOMEM; - tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : - XFS_BUF_SIZE(bp); + + buflen = BBTOB(bp->b_length); + tmp = min_t(int, valuelen, buflen); xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); - if (tmp < XFS_BUF_SIZE(bp)) - xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); + if (tmp < buflen) + xfs_buf_zero(bp, tmp, buflen - tmp); + error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ xfs_buf_relse(bp); if (error) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 854b27a8e776..382c49a42ac2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -198,11 +198,12 @@ xfs_buf_alloc( bp->b_target = target; /* - * Set buffer_length and count_desired to the same value initially. + * Set length and count_desired to the same value initially. * I/O routines should use count_desired, which will be the same in * most cases but may be reset (e.g. XFS recovery). */ - bp->b_buffer_length = bp->b_count_desired = numblks << BBSHIFT; + bp->b_length = numblks; + bp->b_count_desired = numblks << BBSHIFT; bp->b_flags = flags; /* @@ -313,14 +314,14 @@ xfs_buf_allocate_memory( * the memory from the heap - there's no need for the complexity of * page arrays to keep allocation down to order 0. */ - if (bp->b_buffer_length < PAGE_SIZE) { - bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); + if (bp->b_length < BTOBB(PAGE_SIZE)) { + bp->b_addr = kmem_alloc(BBTOB(bp->b_length), xb_to_km(flags)); if (!bp->b_addr) { /* low memory - use alloc_page loop instead */ goto use_alloc_page; } - if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & + if (((unsigned long)(bp->b_addr + BBTOB(bp->b_length) - 1) & PAGE_MASK) != ((unsigned long)bp->b_addr & PAGE_MASK)) { /* b_addr spans two pages - use alloc_page instead */ @@ -337,7 +338,7 @@ xfs_buf_allocate_memory( } use_alloc_page: - end = BBTOB(bp->b_bn) + bp->b_buffer_length; + end = BBTOB(bp->b_bn + bp->b_length); page_count = xfs_buf_btoc(end) - xfs_buf_btoct(BBTOB(bp->b_bn)); error = _xfs_buf_get_pages(bp, page_count, flags); if (unlikely(error)) @@ -477,7 +478,7 @@ _xfs_buf_find( * reallocating a busy extent. Skip this buffer and * continue searching to the right for an exact match. */ - if (bp->b_buffer_length != numbytes) { + if (bp->b_length != numblks) { ASSERT(bp->b_flags & XBF_STALE); rbp = &(*rbp)->rb_right; continue; @@ -574,7 +575,7 @@ xfs_buf_get( * that we can do IO on it. */ bp->b_bn = blkno; - bp->b_count_desired = bp->b_buffer_length; + bp->b_count_desired = BBTOB(bp->b_length); found: if (!(bp->b_flags & XBF_MAPPED)) { @@ -716,7 +717,8 @@ xfs_buf_set_empty( bp->b_pages = NULL; bp->b_page_count = 0; bp->b_addr = NULL; - bp->b_buffer_length = bp->b_count_desired = numblks << BBSHIFT; + bp->b_length = numblks; + bp->b_count_desired = numblks << BBSHIFT; bp->b_bn = XFS_BUF_DADDR_NULL; bp->b_flags &= ~XBF_MAPPED; } @@ -769,7 +771,7 @@ xfs_buf_associate_memory( } bp->b_count_desired = len; - bp->b_buffer_length = buflen; + bp->b_length = BTOBB(buflen); bp->b_flags |= XBF_MAPPED; return 0; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 4d472e5ded7a..3dab208686a4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -117,7 +117,7 @@ typedef struct xfs_buf { */ struct rb_node b_rbnode; /* rbtree node */ xfs_daddr_t b_bn; /* block number for I/O */ - size_t b_buffer_length;/* size of buffer in bytes */ + int b_length; /* size of buffer in BBs */ atomic_t b_hold; /* reference count */ atomic_t b_lru_ref; /* lru reclaim ref count */ xfs_buf_flags_t b_flags; /* status flags */ @@ -246,8 +246,6 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) #define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) #define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) -#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) -#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 89900129a4a3..f9d8355ffae2 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1197,9 +1197,6 @@ xlog_alloc_log(xfs_mount_t *mp, spin_lock_init(&log->l_icloglock); init_waitqueue_head(&log->l_flush_wait); - /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ - ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); - iclogp = &log->l_iclog; /* * The amount of memory to allocate for the iclog structure is @@ -1239,7 +1236,7 @@ xlog_alloc_log(xfs_mount_t *mp, head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); - iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; + iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; atomic_set(&iclog->ic_refcnt, 0); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8a2165c56108..24f59a25ecdd 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -146,7 +146,7 @@ xlog_align( { xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); - ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); + ASSERT(offset + nbblks <= bp->b_length); return bp->b_addr + BBTOB(offset); } @@ -174,7 +174,7 @@ xlog_bread_noalign( nbblks = round_up(nbblks, log->l_sectBBsize); ASSERT(nbblks > 0); - ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); + ASSERT(nbblks <= bp->b_length); XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); XFS_BUF_READ(bp); @@ -219,7 +219,7 @@ xlog_bread_offset( xfs_caddr_t offset) { xfs_caddr_t orig_offset = bp->b_addr; - int orig_len = bp->b_buffer_length; + int orig_len = BBTOB(bp->b_length); int error, error2; error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); @@ -260,7 +260,7 @@ xlog_bwrite( nbblks = round_up(nbblks, log->l_sectBBsize); ASSERT(nbblks > 0); - ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); + ASSERT(nbblks <= bp->b_length); XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); XFS_BUF_ZEROFLAGS(bp); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2e41756e263a..900764c450a8 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -281,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) + __field(int, nblks) __field(int, hold) __field(int, pincount) __field(unsigned, lockval) @@ -291,18 +291,18 @@ DECLARE_EVENT_CLASS(xfs_buf_class, TP_fast_assign( __entry->dev = bp->b_target->bt_dev; __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; + __entry->nblks = bp->b_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " "lock %d flags %s caller %pf", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, - __entry->buffer_length, + __entry->nblks, __entry->hold, __entry->pincount, __entry->lockval, @@ -362,7 +362,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, TP_fast_assign( __entry->dev = bp->b_target->bt_dev; __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; + __entry->buffer_length = BBTOB(bp->b_length); __entry->flags = flags; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); @@ -406,7 +406,7 @@ TRACE_EVENT(xfs_buf_ioerror, TP_fast_assign( __entry->dev = bp->b_target->bt_dev; __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; + __entry->buffer_length = BBTOB(bp->b_length); __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; @@ -450,7 +450,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class, __entry->bli_recur = bip->bli_recur; __entry->bli_refcount = atomic_read(&bip->bli_refcount); __entry->buf_bno = bip->bli_buf->b_bn; - __entry->buf_len = bip->bli_buf->b_buffer_length; + __entry->buf_len = BBTOB(bip->bli_buf->b_length); __entry->buf_flags = bip->bli_buf->b_flags; __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); -- cgit v1.2.3 From 4ecbfe637cbcc0f093d1f295ef483f4e31e3987b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sun, 29 Apr 2012 10:41:10 +0000 Subject: xfs: clean up busy extent naming Now that the busy extent tracking has been moved out of the allocation files, clean up the namespace it uses to "xfs_extent_busy" rather than a mix of "xfs_busy" and "xfs_alloc_busy". Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc.c | 12 ++++---- fs/xfs/xfs_alloc_btree.c | 6 ++-- fs/xfs/xfs_discard.c | 4 +-- fs/xfs/xfs_extent_busy.c | 78 ++++++++++++++++++++++++------------------------ fs/xfs/xfs_extent_busy.h | 20 ++++++------- fs/xfs/xfs_log_cil.c | 6 ++-- fs/xfs/xfs_trace.h | 16 +++++----- fs/xfs/xfs_trans.c | 4 +-- 8 files changed, 73 insertions(+), 73 deletions(-) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index ae6df2585895..588496de0f93 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -47,7 +47,7 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); -STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, +STATIC void xfs_extent_busy_trim(struct xfs_alloc_arg *, xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); /* @@ -152,7 +152,7 @@ xfs_alloc_compute_aligned( xfs_extlen_t len; /* Trim busy sections out of found extent */ - xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); + xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); if (args->alignment > 1 && len >= args->minlen) { xfs_agblock_t aligned_bno = roundup(bno, args->alignment); @@ -536,7 +536,7 @@ xfs_alloc_ag_vextent( if (error) return error; - ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, + ASSERT(!xfs_extent_busy_search(args->mp, args->agno, args->agbno, args->len)); } @@ -603,7 +603,7 @@ xfs_alloc_ag_vextent_exact( /* * Check for overlapping busy extents. */ - xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); + xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); /* * Give up if the start of the extent is busy, or the freespace isn't @@ -1391,7 +1391,7 @@ xfs_alloc_ag_vextent_small( if (error) goto error0; if (fbno != NULLAGBLOCK) { - xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, args->userdata); if (args->userdata) { @@ -2496,7 +2496,7 @@ xfs_free_extent( error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); if (!error) - xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); + xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); return error; diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 3f665487521a..68ebabc388c6 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -94,7 +94,7 @@ xfs_allocbt_alloc_block( return 0; } - xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); @@ -119,8 +119,8 @@ xfs_allocbt_free_block( if (error) return error; - xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, - XFS_ALLOC_BUSY_SKIP_DISCARD); + xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); return 0; } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index e3f1abe774f6..f9c3fe304a17 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -118,7 +118,7 @@ xfs_trim_extents( * If any blocks in the range are still busy, skip the * discard and try again the next time. */ - if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { + if (xfs_extent_busy_search(mp, agno, fbno, flen)) { trace_xfs_discard_busy(mp, agno, fbno, flen); goto next_extent; } @@ -212,7 +212,7 @@ xfs_discard_extents( struct xfs_mount *mp, struct list_head *list) { - struct xfs_busy_extent *busyp; + struct xfs_extent_busy *busyp; int error = 0; list_for_each_entry(busyp, list, list) { diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 4b5a4fa869af..9475bd989379 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -32,27 +32,27 @@ #include "xfs_trace.h" void -xfs_alloc_busy_insert( +xfs_extent_busy_insert( struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags) { - struct xfs_busy_extent *new; - struct xfs_busy_extent *busyp; + struct xfs_extent_busy *new; + struct xfs_extent_busy *busyp; struct xfs_perag *pag; struct rb_node **rbp; struct rb_node *parent = NULL; - new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); + new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL); if (!new) { /* * No Memory! Since it is now not possible to track the free * block, make this a synchronous transaction to insure that * the block is not reused before this transaction commits. */ - trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); + trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len); xfs_trans_set_sync(tp); return; } @@ -64,14 +64,14 @@ xfs_alloc_busy_insert( new->flags = flags; /* trace before insert to be able to see failed inserts */ - trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); + trace_xfs_extent_busy(tp->t_mountp, agno, bno, len); pag = xfs_perag_get(tp->t_mountp, new->agno); spin_lock(&pag->pagb_lock); rbp = &pag->pagb_tree.rb_node; while (*rbp) { parent = *rbp; - busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); + busyp = rb_entry(parent, struct xfs_extent_busy, rb_node); if (new->bno < busyp->bno) { rbp = &(*rbp)->rb_left; @@ -95,14 +95,14 @@ xfs_alloc_busy_insert( /* * Search for a busy extent within the range of the extent we are about to * allocate. You need to be holding the busy extent tree lock when calling - * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy + * xfs_extent_busy_search(). This function returns 0 for no overlapping busy * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact * match. This is done so that a non-zero return indicates an overlap that * will require a synchronous transaction, but it can still be * used to distinguish between a partial or exact match. */ int -xfs_alloc_busy_search( +xfs_extent_busy_search( struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno, @@ -110,7 +110,7 @@ xfs_alloc_busy_search( { struct xfs_perag *pag; struct rb_node *rbp; - struct xfs_busy_extent *busyp; + struct xfs_extent_busy *busyp; int match = 0; pag = xfs_perag_get(mp, agno); @@ -120,7 +120,7 @@ xfs_alloc_busy_search( /* find closest start bno overlap */ while (rbp) { - busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); + busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); if (bno < busyp->bno) { /* may overlap, but exact start block is lower */ if (bno + len > busyp->bno) @@ -154,10 +154,10 @@ xfs_alloc_busy_search( * needs to be restarted. */ STATIC bool -xfs_alloc_busy_update_extent( +xfs_extent_busy_update_extent( struct xfs_mount *mp, struct xfs_perag *pag, - struct xfs_busy_extent *busyp, + struct xfs_extent_busy *busyp, xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata) @@ -171,7 +171,7 @@ xfs_alloc_busy_update_extent( * performing the discard a chance to mark the extent unbusy * and retry. */ - if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { + if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) { spin_unlock(&pag->pagb_lock); delay(1); spin_lock(&pag->pagb_lock); @@ -285,13 +285,13 @@ xfs_alloc_busy_update_extent( ASSERT(0); } - trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); + trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen); return true; out_force_log: spin_unlock(&pag->pagb_lock); xfs_log_force(mp, XFS_LOG_SYNC); - trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); + trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen); spin_lock(&pag->pagb_lock); return false; } @@ -301,7 +301,7 @@ out_force_log: * For a given extent [fbno, flen], make sure we can reuse it safely. */ void -xfs_alloc_busy_reuse( +xfs_extent_busy_reuse( struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t fbno, @@ -318,8 +318,8 @@ xfs_alloc_busy_reuse( restart: rbp = pag->pagb_tree.rb_node; while (rbp) { - struct xfs_busy_extent *busyp = - rb_entry(rbp, struct xfs_busy_extent, rb_node); + struct xfs_extent_busy *busyp = + rb_entry(rbp, struct xfs_extent_busy, rb_node); xfs_agblock_t bbno = busyp->bno; xfs_agblock_t bend = bbno + busyp->length; @@ -331,7 +331,7 @@ restart: continue; } - if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, + if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen, userdata)) goto restart; } @@ -346,7 +346,7 @@ restart: * code needs to force out the log and retry the allocation. */ STATIC void -xfs_alloc_busy_trim( +xfs_extent_busy_trim( struct xfs_alloc_arg *args, xfs_agblock_t bno, xfs_extlen_t len, @@ -365,8 +365,8 @@ restart: flen = len; rbp = args->pag->pagb_tree.rb_node; while (rbp && flen >= args->minlen) { - struct xfs_busy_extent *busyp = - rb_entry(rbp, struct xfs_busy_extent, rb_node); + struct xfs_extent_busy *busyp = + rb_entry(rbp, struct xfs_extent_busy, rb_node); xfs_agblock_t fend = fbno + flen; xfs_agblock_t bbno = busyp->bno; xfs_agblock_t bend = bbno + busyp->length; @@ -384,8 +384,8 @@ restart: * extent instead of trimming the allocation. */ if (!args->userdata && - !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { - if (!xfs_alloc_busy_update_extent(args->mp, args->pag, + !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { + if (!xfs_extent_busy_update_extent(args->mp, args->pag, busyp, fbno, flen, false)) goto restart; @@ -517,7 +517,7 @@ restart: spin_unlock(&args->pag->pagb_lock); if (fbno != bno || flen != len) { - trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, + trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, flen); } *rbno = fbno; @@ -529,19 +529,19 @@ fail: * re-check if the trimmed extent satisfies the minlen requirement. */ spin_unlock(&args->pag->pagb_lock); - trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); + trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); *rbno = fbno; *rlen = 0; } -static void -xfs_alloc_busy_clear_one( +STATIC void +xfs_extent_busy_clear_one( struct xfs_mount *mp, struct xfs_perag *pag, - struct xfs_busy_extent *busyp) + struct xfs_extent_busy *busyp) { if (busyp->length) { - trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, + trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno, busyp->length); rb_erase(&busyp->rb_node, &pag->pagb_tree); } @@ -556,12 +556,12 @@ xfs_alloc_busy_clear_one( * these as undergoing a discard operation instead. */ void -xfs_alloc_busy_clear( +xfs_extent_busy_clear( struct xfs_mount *mp, struct list_head *list, bool do_discard) { - struct xfs_busy_extent *busyp, *n; + struct xfs_extent_busy *busyp, *n; struct xfs_perag *pag = NULL; xfs_agnumber_t agno = NULLAGNUMBER; @@ -577,10 +577,10 @@ xfs_alloc_busy_clear( } if (do_discard && busyp->length && - !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) - busyp->flags = XFS_ALLOC_BUSY_DISCARDED; + !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) + busyp->flags = XFS_EXTENT_BUSY_DISCARDED; else - xfs_alloc_busy_clear_one(mp, pag, busyp); + xfs_extent_busy_clear_one(mp, pag, busyp); } if (pag) { @@ -593,11 +593,11 @@ xfs_alloc_busy_clear( * Callback for list_sort to sort busy extents by the AG they reside in. */ int -xfs_alloc_busy_ag_cmp( +xfs_extent_busy_ag_cmp( void *priv, struct list_head *a, struct list_head *b) { - return container_of(a, struct xfs_busy_extent, list)->agno - - container_of(b, struct xfs_busy_extent, list)->agno; + return container_of(a, struct xfs_extent_busy, list)->agno - + container_of(b, struct xfs_extent_busy, list)->agno; } diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 671b501f13e5..91f2fcbb2001 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -27,39 +27,39 @@ * Note that we use the transaction ID to record the transaction, not the * transaction structure itself. See xfs_extent_busy_insert() for details. */ -struct xfs_busy_extent { +struct xfs_extent_busy { struct rb_node rb_node; /* ag by-bno indexed search tree */ struct list_head list; /* transaction busy extent list */ xfs_agnumber_t agno; xfs_agblock_t bno; xfs_extlen_t length; unsigned int flags; -#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ -#define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ +#define XFS_EXTENT_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ +#define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */ }; void -xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, +xfs_extent_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); void -xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, +xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list, bool do_discard); int -xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, +xfs_extent_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len); void -xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, +xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); int -xfs_alloc_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); +xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); -static inline void xfs_alloc_busy_sort(struct list_head *list) +static inline void xfs_extent_busy_sort(struct list_head *list) { - list_sort(NULL, list, xfs_alloc_busy_ag_cmp); + list_sort(NULL, list, xfs_extent_busy_ag_cmp); } #endif /* __XFS_EXTENT_BUSY_H__ */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index a6e3e71e3f88..601ccf02618a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -338,8 +338,8 @@ xlog_cil_committed( xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, ctx->start_lsn, abort); - xfs_alloc_busy_sort(&ctx->busy_extents); - xfs_alloc_busy_clear(mp, &ctx->busy_extents, + xfs_extent_busy_sort(&ctx->busy_extents); + xfs_extent_busy_clear(mp, &ctx->busy_extents, (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); spin_lock(&ctx->cil->xc_cil_lock); @@ -352,7 +352,7 @@ xlog_cil_committed( ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); xfs_discard_extents(mp, &ctx->busy_extents); - xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); + xfs_extent_busy_clear(mp, &ctx->busy_extents, false); } kmem_free(ctx); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 900764c450a8..febff43176f1 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1142,7 +1142,7 @@ TRACE_EVENT(xfs_bunmap, ); -DECLARE_EVENT_CLASS(xfs_busy_class, +DECLARE_EVENT_CLASS(xfs_extent_busy_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len), TP_ARGS(mp, agno, agbno, len), @@ -1165,17 +1165,17 @@ DECLARE_EVENT_CLASS(xfs_busy_class, __entry->len) ); #define DEFINE_BUSY_EVENT(name) \ -DEFINE_EVENT(xfs_busy_class, name, \ +DEFINE_EVENT(xfs_extent_busy_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ xfs_agblock_t agbno, xfs_extlen_t len), \ TP_ARGS(mp, agno, agbno, len)) -DEFINE_BUSY_EVENT(xfs_alloc_busy); -DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); -DEFINE_BUSY_EVENT(xfs_alloc_busy_force); -DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); -DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); +DEFINE_BUSY_EVENT(xfs_extent_busy); +DEFINE_BUSY_EVENT(xfs_extent_busy_enomem); +DEFINE_BUSY_EVENT(xfs_extent_busy_force); +DEFINE_BUSY_EVENT(xfs_extent_busy_reuse); +DEFINE_BUSY_EVENT(xfs_extent_busy_clear); -TRACE_EVENT(xfs_alloc_busy_trim, +TRACE_EVENT(xfs_extent_busy_trim, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len, xfs_agblock_t tbno, xfs_extlen_t tlen), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index d8bdb618ec19..f674855c8dc9 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -608,8 +608,8 @@ STATIC void xfs_trans_free( struct xfs_trans *tp) { - xfs_alloc_busy_sort(&tp->t_busy); - xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); + xfs_extent_busy_sort(&tp->t_busy); + xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); atomic_dec(&tp->t_mountp->m_active_trans); xfs_trans_free_dqinfo(tp); -- cgit v1.2.3 From 14c26c6a05de138a4fd9a0c05ff8e7435a618324 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Apr 2012 16:33:31 +1000 Subject: xfs: add trace points for log forces To enable easy tracing of the location of log forces and the frequency of them via perf, add a pair of trace points to the log force functions. This will help debug where excessive log forces are being issued from by simple perf commands like: # ~/perf/perf top -e xfs:xfs_log_force -G -U Which gives this sort of output: Events: 141 xfs:xfs_log_force - 100.00% [kernel] [k] xfs_log_force - xfs_log_force 87.04% xfsaild kthread kernel_thread_helper - 12.87% xfs_buf_lock _xfs_buf_find xfs_buf_get xfs_trans_get_buf xfs_da_do_buf xfs_da_get_buf xfs_dir2_data_init xfs_dir2_leaf_addname xfs_dir_createname xfs_create xfs_vn_mknod xfs_vn_create vfs_create do_last.isra.41 path_openat do_filp_open do_sys_open sys_open system_call_fastpath Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_log.c | 2 ++ fs/xfs/xfs_trace.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'fs/xfs/xfs_trace.h') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9b10a15295a2..6b965bf450e4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2941,6 +2941,7 @@ xfs_log_force( { int error; + trace_xfs_log_force(mp, 0); error = _xfs_log_force(mp, flags, NULL); if (error) xfs_warn(mp, "%s: error %d returned.", __func__, error); @@ -3089,6 +3090,7 @@ xfs_log_force_lsn( { int error; + trace_xfs_log_force(mp, lsn); error = _xfs_log_force_lsn(mp, lsn, flags, NULL); if (error) xfs_warn(mp, "%s: error %d returned.", __func__, error); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index febff43176f1..7cf9d3529e51 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -874,6 +874,22 @@ DECLARE_EVENT_CLASS(xfs_log_item_class, __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) ) +TRACE_EVENT(xfs_log_force, + TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn), + TP_ARGS(mp, lsn), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->lsn = lsn; + ), + TP_printk("dev %d:%d lsn 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->lsn) +) + #define DEFINE_LOG_ITEM_EVENT(name) \ DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ -- cgit v1.2.3