diff options
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r-- | fs/xfs/xfs_reflink.c | 265 |
1 files changed, 185 insertions, 80 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 07593a362cd0..da6d08fb359c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -82,11 +82,22 @@ * mappings are a reservation against the free space in the filesystem; * adjacent mappings can also be combined into fewer larger mappings. * + * As an optimization, the CoW extent size hint (cowextsz) creates + * outsized aligned delalloc reservations in the hope of landing out of + * order nearby CoW writes in a single extent on disk, thereby reducing + * fragmentation and improving future performance. + * + * D: --RRRRRRSSSRRRRRRRR--- (data fork) + * C: ------DDDDDDD--------- (CoW fork) + * * When dirty pages are being written out (typically in writepage), the - * delalloc reservations are converted into real mappings by allocating - * blocks and replacing the delalloc mapping with real ones. A delalloc - * mapping can be replaced by several real ones if the free space is - * fragmented. + * delalloc reservations are converted into unwritten mappings by + * allocating blocks and replacing the delalloc mapping with real ones. + * A delalloc mapping can be replaced by several unwritten ones if the + * free space is fragmented. + * + * D: --RRRRRRSSSRRRRRRRR--- + * C: ------UUUUUUU--------- * * We want to adapt the delalloc mechanism for copy-on-write, since the * write paths are similar. The first two steps (creating the reservation @@ -101,13 +112,29 @@ * Block-aligned directio writes will use the same mechanism as buffered * writes. * + * Just prior to submitting the actual disk write requests, we convert + * the extents representing the range of the file actually being written + * (as opposed to extra pieces created for the cowextsize hint) to real + * extents. This will become important in the next step: + * + * D: --RRRRRRSSSRRRRRRRR--- + * C: ------UUrrUUU--------- + * * CoW remapping must be done after the data block write completes, * because we don't want to destroy the old data fork map until we're sure * the new block has been written. Since the new mappings are kept in a * separate fork, we can simply iterate these mappings to find the ones * that cover the file blocks that we just CoW'd. For each extent, simply * unmap the corresponding range in the data fork, map the new range into - * the data fork, and remove the extent from the CoW fork. + * the data fork, and remove the extent from the CoW fork. Because of + * the presence of the cowextsize hint, however, we must be careful + * only to remap the blocks that we've actually written out -- we must + * never remap delalloc reservations nor CoW staging blocks that have + * yet to be written. This corresponds exactly to the real extents in + * the CoW fork: + * + * D: --RRRRRRrrSRRRRRRRR--- + * C: ------UU--UUU--------- * * Since the remapping operation can be applied to an arbitrary file * range, we record the need for the remap step as a flag in the ioend @@ -296,103 +323,165 @@ xfs_reflink_reserve_cow( return 0; } -/* Allocate all CoW reservations covering a range of blocks in a file. */ -static int -__xfs_reflink_allocate_cow( - struct xfs_inode *ip, - xfs_fileoff_t *offset_fsb, - xfs_fileoff_t end_fsb) +/* Convert part of an unwritten CoW extent to a real one. */ +STATIC int +xfs_reflink_convert_cow_extent( + struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + struct xfs_defer_ops *dfops) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_bmbt_irec imap; - struct xfs_defer_ops dfops; - struct xfs_trans *tp; - xfs_fsblock_t first_block; - int nimaps = 1, error; - bool shared; - - xfs_defer_init(&dfops, &first_block); + xfs_fsblock_t first_block; + int nimaps = 1; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, - XFS_TRANS_RESERVE, &tp); - if (error) - return error; + if (imap->br_state == XFS_EXT_NORM) + return 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trim_extent(imap, offset_fsb, count_fsb); + trace_xfs_reflink_convert_cow(ip, imap); + if (imap->br_blockcount == 0) + return 0; + return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount, + XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, + 0, imap, &nimaps, dfops); +} - /* Read extent from the source file. */ - nimaps = 1; - error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, - &imap, &nimaps, 0); - if (error) - goto out_unlock; - ASSERT(nimaps == 1); +/* Convert all of the unwritten CoW extents in a file's range to real ones. */ +int +xfs_reflink_convert_cow( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + struct xfs_bmbt_irec got; + struct xfs_defer_ops dfops; + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); + xfs_extnum_t idx; + bool found; + int error = 0; - error = xfs_reflink_reserve_cow(ip, &imap, &shared); - if (error) - goto out_trans_cancel; + xfs_ilock(ip, XFS_ILOCK_EXCL); - if (!shared) { - *offset_fsb = imap.br_startoff + imap.br_blockcount; - goto out_trans_cancel; + /* Convert all the extents to real from unwritten. */ + for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); + found && got.br_startoff < end_fsb; + found = xfs_iext_get_extent(ifp, ++idx, &got)) { + error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, + end_fsb - offset_fsb, &dfops); + if (error) + break; } - xfs_trans_ijoin(tp, ip, 0); - error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, - XFS_BMAPI_COWFORK, &first_block, - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), - &imap, &nimaps, &dfops); - if (error) - goto out_trans_cancel; - - error = xfs_defer_finish(&tp, &dfops, NULL); - if (error) - goto out_trans_cancel; - - error = xfs_trans_commit(tp); - - *offset_fsb = imap.br_startoff + imap.br_blockcount; -out_unlock: + /* Finish up. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; -out_trans_cancel: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_unlock; } -/* Allocate all CoW reservations covering a part of a file. */ +/* Allocate all CoW reservations covering a range of blocks in a file. */ int -xfs_reflink_allocate_cow_range( +xfs_reflink_allocate_cow( struct xfs_inode *ip, - xfs_off_t offset, - xfs_off_t count) + struct xfs_bmbt_irec *imap, + bool *shared, + uint *lockmode) { struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); - int error; + xfs_fileoff_t offset_fsb = imap->br_startoff; + xfs_filblks_t count_fsb = imap->br_blockcount; + struct xfs_bmbt_irec got; + struct xfs_defer_ops dfops; + struct xfs_trans *tp = NULL; + xfs_fsblock_t first_block; + int nimaps, error = 0; + bool trimmed; + xfs_filblks_t resaligned; + xfs_extlen_t resblks = 0; + xfs_extnum_t idx; +retry: ASSERT(xfs_is_reflink_inode(ip)); - - trace_xfs_reflink_allocate_cow_range(ip, offset, count); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); /* - * Make sure that the dquots are there. + * Even if the extent is not shared we might have a preallocation for + * it in the COW fork. If so use it. */ - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && + got.br_startoff <= offset_fsb) { + *shared = true; - while (offset_fsb < end_fsb) { - error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); - if (error) { - trace_xfs_reflink_allocate_cow_range_error(ip, error, - _RET_IP_); - break; + /* If we have a real allocation in the COW fork we're done. */ + if (!isnullstartblock(got.br_startblock)) { + xfs_trim_extent(&got, offset_fsb, count_fsb); + *imap = got; + goto convert; } + + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); + } else { + error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); + if (error || !*shared) + goto out; + } + + if (!tp) { + resaligned = xfs_aligned_fsb_count(imap->br_startoff, + imap->br_blockcount, xfs_get_cowextsz_hint(ip)); + resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); + + xfs_iunlock(ip, *lockmode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + *lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, *lockmode); + + if (error) + return error; + + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out; + goto retry; } + error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto out; + + xfs_trans_ijoin(tp, ip, 0); + + xfs_defer_init(&dfops, &first_block); + nimaps = 1; + + /* Allocate the entire reservation as unwritten blocks. */ + error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, + XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, + resblks, imap, &nimaps, &dfops); + if (error) + goto out_bmap_cancel; + + /* Finish up. */ + error = xfs_defer_finish(&tp, &dfops, NULL); + if (error) + goto out_bmap_cancel; + + error = xfs_trans_commit(tp); + if (error) + return error; +convert: + return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, + &dfops); +out_bmap_cancel: + xfs_defer_cancel(&dfops); + xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, + XFS_QMOPT_RES_REGBLKS); +out: + if (tp) + xfs_trans_cancel(tp); return error; } @@ -641,6 +730,16 @@ xfs_reflink_end_cow( ASSERT(!isnullstartblock(got.br_startblock)); + /* + * Don't remap unwritten extents; these are + * speculatively preallocated CoW extents that have been + * allocated but have not yet been involved in a write. + */ + if (got.br_state == XFS_EXT_UNWRITTEN) { + idx--; + goto next_extent; + } + /* Unmap the old blocks in the data fork. */ xfs_defer_init(&dfops, &firstfsb); rlen = del.br_blockcount; @@ -855,13 +954,14 @@ STATIC int xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, - xfs_extlen_t cowextsize) + xfs_extlen_t cowextsize, + bool is_dedupe) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; - if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -882,6 +982,10 @@ xfs_reflink_update_dest( dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } + if (!is_dedupe) { + xfs_trans_ichgtime(tp, dest, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); @@ -1195,7 +1299,8 @@ xfs_reflink_remap_range( !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, + is_dedupe); out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); |