diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 50 |
1 files changed, 32 insertions, 18 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 13d1d3e95b88..508319039dce 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -97,6 +97,8 @@ xlog_alloc_buffer( struct xlog *log, int nbblks) { + int align_mask = xfs_buftarg_dma_alignment(log->l_targ); + /* * Pass log block 0 since we don't have an addr yet, buffer will be * verified on read. @@ -125,7 +127,7 @@ xlog_alloc_buffer( if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL); + return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL); } /* @@ -1960,7 +1962,7 @@ xlog_recover_buffer_pass1( } } - bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); + bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0); bcp->bc_blkno = buf_f->blf_blkno; bcp->bc_len = buf_f->blf_len; bcp->bc_refcount = 1; @@ -2930,7 +2932,7 @@ xlog_recover_inode_pass2( if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { in_f = item->ri_buf[0].i_addr; } else { - in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP); + in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0); need_free = 1; error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); if (error) @@ -4161,7 +4163,7 @@ xlog_recover_add_item( { xlog_recover_item_t *item; - item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); + item = kmem_zalloc(sizeof(xlog_recover_item_t), 0); INIT_LIST_HEAD(&item->ri_list); list_add_tail(&item->ri_list, head); } @@ -4201,7 +4203,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP); + ptr = kmem_realloc(old_ptr, len + old_len, 0); memcpy(&ptr[old_len], dp, len); item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; @@ -4261,7 +4263,7 @@ xlog_recover_add_to_trans( return 0; } - ptr = kmem_alloc(len, KM_SLEEP); + ptr = kmem_alloc(len, 0); memcpy(ptr, dp, len); in_f = (struct xfs_inode_log_format *)ptr; @@ -4289,7 +4291,7 @@ xlog_recover_add_to_trans( item->ri_total = in_f->ilf_size; item->ri_buf = kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), - KM_SLEEP); + 0); } ASSERT(item->ri_total > item->ri_cnt); /* Description region is ri_buf[0] */ @@ -4423,7 +4425,7 @@ xlog_recover_ophdr_to_trans( * This is a new transaction so allocate a new recovery container to * hold the recovery ops that will follow. */ - trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP); + trans = kmem_zalloc(sizeof(struct xlog_recover), 0); trans->r_log_tid = tid; trans->r_lsn = be64_to_cpu(rhead->h_lsn); INIT_LIST_HEAD(&trans->r_itemq); @@ -5022,16 +5024,27 @@ xlog_recover_process_one_iunlink( } /* - * xlog_iunlink_recover + * Recover AGI unlinked lists + * + * This is called during recovery to process any inodes which we unlinked but + * not freed when the system crashed. These inodes will be on the lists in the + * AGI blocks. What we do here is scan all the AGIs and fully truncate and free + * any inodes found on the lists. Each inode is removed from the lists when it + * has been fully truncated and is freed. The freeing of the inode and its + * removal from the list must be atomic. + * + * If everything we touch in the agi processing loop is already in memory, this + * loop can hold the cpu for a long time. It runs without lock contention, + * memory allocation contention, the need wait for IO, etc, and so will run + * until we either run out of inodes to process, run low on memory or we run out + * of log space. * - * This is called during recovery to process any inodes which - * we unlinked but not freed when the system crashed. These - * inodes will be on the lists in the AGI blocks. What we do - * here is scan all the AGIs and fully truncate and free any - * inodes found on the lists. Each inode is removed from the - * lists when it has been fully truncated and is freed. The - * freeing of the inode and its removal from the list must be - * atomic. + * This behaviour is bad for latency on single CPU and non-preemptible kernels, + * and can prevent other filesytem work (such as CIL pushes) from running. This + * can lead to deadlocks if the recovery process runs out of log reservation + * space. Hence we need to yield the CPU when there is other kernel work + * scheduled on this CPU to ensure other scheduled work can run without undue + * latency. */ STATIC void xlog_recover_process_iunlinks( @@ -5078,6 +5091,7 @@ xlog_recover_process_iunlinks( while (agino != NULLAGINO) { agino = xlog_recover_process_one_iunlink(mp, agno, agino, bucket); + cond_resched(); } } xfs_buf_rele(agibp); @@ -5527,7 +5541,7 @@ xlog_do_log_recovery( */ log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * sizeof(struct list_head), - KM_SLEEP); + 0); for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); |