diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 371 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 77 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 27 |
5 files changed, 384 insertions, 146 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index de2d26d32844..79c981984dca 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5404,39 +5404,237 @@ error0: } /* + * Determine whether an extent shift can be accomplished by a merge with the + * extent that precedes the target hole of the shift. + */ +STATIC bool +xfs_bmse_can_merge( + struct xfs_bmbt_irec *left, /* preceding extent */ + struct xfs_bmbt_irec *got, /* current extent to shift */ + xfs_fileoff_t shift) /* shift fsb */ +{ + xfs_fileoff_t startoff; + + startoff = got->br_startoff - shift; + + /* + * The extent, once shifted, must be adjacent in-file and on-disk with + * the preceding extent. + */ + if ((left->br_startoff + left->br_blockcount != startoff) || + (left->br_startblock + left->br_blockcount != got->br_startblock) || + (left->br_state != got->br_state) || + (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) + return false; + + return true; +} + +/* + * A bmap extent shift adjusts the file offset of an extent to fill a preceding + * hole in the file. If an extent shift would result in the extent being fully + * adjacent to the extent that currently precedes the hole, we can merge with + * the preceding extent rather than do the shift. + * + * This function assumes the caller has verified a shift-by-merge is possible + * with the provided extents via xfs_bmse_can_merge(). + */ +STATIC int +xfs_bmse_merge( + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t shift, /* shift fsb */ + int current_ext, /* idx of gotp */ + struct xfs_bmbt_rec_host *gotp, /* extent to shift */ + struct xfs_bmbt_rec_host *leftp, /* preceding extent */ + struct xfs_btree_cur *cur, + int *logflags) /* output */ +{ + struct xfs_ifork *ifp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + xfs_filblks_t blockcount; + int error, i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_bmbt_get_all(gotp, &got); + xfs_bmbt_get_all(leftp, &left); + blockcount = left.br_blockcount + got.br_blockcount; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(xfs_bmse_can_merge(&left, &got, shift)); + + /* + * Merge the in-core extents. Note that the host record pointers and + * current_ext index are invalid once the extent has been removed via + * xfs_iext_remove(). + */ + xfs_bmbt_set_blockcount(leftp, blockcount); + xfs_iext_remove(ip, current_ext, 1, 0); + + /* + * Update the on-disk extent count, the btree if necessary and log the + * inode. + */ + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + *logflags |= XFS_ILOG_CORE; + if (!cur) { + *logflags |= XFS_ILOG_DEXT; + return 0; + } + + /* lookup and remove the extent to merge */ + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + error = xfs_btree_delete(cur, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + /* lookup and update size of the previous extent */ + error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + left.br_blockcount = blockcount; + + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, + left.br_blockcount, left.br_state); + if (error) + goto out_error; + + return 0; + +out_error: + return error; +} + +/* + * Shift a single extent. + */ +STATIC int +xfs_bmse_shift_one( + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t offset_shift_fsb, + int *current_ext, + struct xfs_bmbt_rec_host *gotp, + struct xfs_btree_cur *cur, + int *logflags) +{ + struct xfs_ifork *ifp; + xfs_fileoff_t startoff; + struct xfs_bmbt_rec_host *leftp; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec left; + int error; + int i; + + ifp = XFS_IFORK_PTR(ip, whichfork); + + xfs_bmbt_get_all(gotp, &got); + startoff = got.br_startoff - offset_shift_fsb; + + /* delalloc extents should be prevented by caller */ + XFS_WANT_CORRUPTED_GOTO(!isnullstartblock(got.br_startblock), + out_error); + + /* + * If this is the first extent in the file, make sure there's enough + * room at the start of the file and jump right to the shift as there's + * no left extent to merge. + */ + if (*current_ext == 0) { + if (got.br_startoff < offset_shift_fsb) + return -EINVAL; + goto shift_extent; + } + + /* grab the left extent and check for a large enough hole */ + leftp = xfs_iext_get_ext(ifp, *current_ext - 1); + xfs_bmbt_get_all(leftp, &left); + + if (startoff < left.br_startoff + left.br_blockcount) + return -EINVAL; + + /* check whether to merge the extent or shift it down */ + if (!xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) + goto shift_extent; + + return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, *current_ext, + gotp, leftp, cur, logflags); + +shift_extent: + /* + * Increment the extent index for the next iteration, update the start + * offset of the in-core extent and update the btree if applicable. + */ + (*current_ext)++; + xfs_bmbt_set_startoff(gotp, startoff); + *logflags |= XFS_ILOG_CORE; + if (!cur) { + *logflags |= XFS_ILOG_DEXT; + return 0; + } + + error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, &i); + if (error) + return error; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_error); + + got.br_startoff = startoff; + error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, + got.br_blockcount, got.br_state); + if (error) + return error; + + return 0; + +out_error: + return error; +} + +/* * Shift extent records to the left to cover a hole. * - * The maximum number of extents to be shifted in a single operation - * is @num_exts, and @current_ext keeps track of the current extent - * index we have shifted. @offset_shift_fsb is the length by which each - * extent is shifted. If there is no hole to shift the extents - * into, this will be considered invalid operation and we abort immediately. + * The maximum number of extents to be shifted in a single operation is + * @num_exts. @start_fsb specifies the file offset to start the shift and the + * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb + * is the length by which each extent is shifted. If there is no hole to shift + * the extents into, this will be considered invalid operation and we abort + * immediately. */ int xfs_bmap_shift_extents( struct xfs_trans *tp, struct xfs_inode *ip, - int *done, xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, - xfs_extnum_t *current_ext, + int *done, + xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, int num_exts) { - struct xfs_btree_cur *cur; + struct xfs_btree_cur *cur = NULL; struct xfs_bmbt_rec_host *gotp; struct xfs_bmbt_irec got; - struct xfs_bmbt_irec left; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; xfs_extnum_t nexts = 0; - xfs_fileoff_t startoff; + xfs_extnum_t current_ext; int error = 0; - int i; int whichfork = XFS_DATA_FORK; - int logflags; - xfs_filblks_t blockcount = 0; + int logflags = 0; int total_extents; if (unlikely(XFS_TEST_ERROR( @@ -5451,7 +5649,8 @@ xfs_bmap_shift_extents( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - ASSERT(current_ext != NULL); + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS)) { @@ -5461,142 +5660,62 @@ xfs_bmap_shift_extents( return error; } - /* - * If *current_ext is 0, we would need to lookup the extent - * from where we would start shifting and store it in gotp. - */ - if (!*current_ext) { - gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); - /* - * gotp can be null in 2 cases: 1) if there are no extents - * or 2) start_fsb lies in a hole beyond which there are - * no extents. Either way, we are done. - */ - if (!gotp) { - *done = 1; - return 0; - } - } - - /* We are going to change core inode */ - logflags = XFS_ILOG_CORE; if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; - } else { - cur = NULL; - logflags |= XFS_ILOG_DEXT; + } + + /* + * Look up the extent index for the fsb where we start shifting. We can + * henceforth iterate with current_ext as extent list changes are locked + * out via ilock. + * + * gotp can be null in 2 cases: 1) if there are no extents or 2) + * start_fsb lies in a hole beyond which there are no extents. Either + * way, we are done. + */ + gotp = xfs_iext_bno_to_ext(ifp, start_fsb, ¤t_ext); + if (!gotp) { + *done = 1; + goto del_cursor; } /* * There may be delalloc extents in the data fork before the range we - * are collapsing out, so we cannot - * use the count of real extents here. Instead we have to calculate it - * from the incore fork. + * are collapsing out, so we cannot use the count of real extents here. + * Instead we have to calculate it from the incore fork. */ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); - while (nexts++ < num_exts && *current_ext < total_extents) { - - gotp = xfs_iext_get_ext(ifp, *current_ext); - xfs_bmbt_get_all(gotp, &got); - startoff = got.br_startoff - offset_shift_fsb; - - /* - * Before shifting extent into hole, make sure that the hole - * is large enough to accomodate the shift. - */ - if (*current_ext) { - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, - *current_ext - 1), &left); - - if (startoff < left.br_startoff + left.br_blockcount) - error = -EINVAL; - } else if (offset_shift_fsb > got.br_startoff) { - /* - * When first extent is shifted, offset_shift_fsb - * should be less than the stating offset of - * the first extent. - */ - error = -EINVAL; - } - + while (nexts++ < num_exts && current_ext < total_extents) { + error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, + ¤t_ext, gotp, cur, &logflags); if (error) goto del_cursor; - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - /* Check if we can merge 2 adjacent extents */ - if (*current_ext && - left.br_startoff + left.br_blockcount == startoff && - left.br_startblock + left.br_blockcount == - got.br_startblock && - left.br_state == got.br_state && - left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { - blockcount = left.br_blockcount + - got.br_blockcount; - xfs_iext_remove(ip, *current_ext, 1, 0); - if (cur) { - error = xfs_btree_delete(cur, &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); - gotp = xfs_iext_get_ext(ifp, --*current_ext); - xfs_bmbt_get_all(gotp, &got); - - /* Make cursor point to the extent we will update */ - if (cur) { - error = xfs_bmbt_lookup_eq(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - &i); - if (error) - goto del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); - } - - xfs_bmbt_set_blockcount(gotp, blockcount); - got.br_blockcount = blockcount; - } else { - /* We have to update the startoff */ - xfs_bmbt_set_startoff(gotp, startoff); - got.br_startoff = startoff; - } - - if (cur) { - error = xfs_bmbt_update(cur, got.br_startoff, - got.br_startblock, - got.br_blockcount, - got.br_state); - if (error) - goto del_cursor; - } - - (*current_ext)++; + /* update total extent count and grab the next record */ total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); + if (current_ext >= total_extents) + break; + gotp = xfs_iext_get_ext(ifp, current_ext); } /* Check if we are done */ - if (*current_ext == total_extents) + if (current_ext == total_extents) { *done = 1; + } else if (next_fsb) { + xfs_bmbt_get_all(gotp, &got); + *next_fsb = got.br_startoff; + } del_cursor: if (cur) xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_trans_log_inode(tp, ip, logflags); + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b879ca56a64c..44db6db86402 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -178,9 +178,8 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, - int *done, xfs_fileoff_t start_fsb, - xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, - xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, - int num_exts); + xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, + int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, + struct xfs_bmap_free *flist, int num_exts); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11e9b4caa54f..2f502537a39c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -434,10 +434,22 @@ xfs_start_page_writeback( { ASSERT(PageLocked(page)); ASSERT(!PageWriteback(page)); - if (clear_dirty) + + /* + * if the page was not fully cleaned, we need to ensure that the higher + * layers come back to it correctly. That means we need to keep the page + * dirty, and for WB_SYNC_ALL writeback we need to ensure the + * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to + * write this page in this writeback sweep will be made. + */ + if (clear_dirty) { clear_page_dirty_for_io(page); - set_page_writeback(page); + set_page_writeback(page); + } else + set_page_writeback_keepwrite(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ if (!buffers) end_page_writeback(page); @@ -1753,11 +1765,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 1cb345e7c181..d8b77b5bf4d9 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1205,6 +1205,7 @@ xfs_free_file_space( xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; + xfs_off_t iendoffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; @@ -1233,12 +1234,13 @@ xfs_free_file_space( inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); - ioffset = offset & ~(rounding - 1); - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ioffset, -1); + ioffset = round_down(offset, rounding); + iendoffset = round_up(offset + len, rounding) - 1; + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, + iendoffset); if (error) goto out; - truncate_pagecache_range(VFS_I(ip), ioffset, -1); + truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); /* * Need to zero the stuff we're not freeing, on disk. @@ -1456,24 +1458,50 @@ xfs_collapse_file_space( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; - xfs_extnum_t current_ext = 0; struct xfs_bmap_free free_list; xfs_fsblock_t first_block; int committed; xfs_fileoff_t start_fsb; + xfs_fileoff_t next_fsb; xfs_fileoff_t shift_fsb; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); trace_xfs_collapse_file_space(ip); - start_fsb = XFS_B_TO_FSB(mp, offset + len); + next_fsb = XFS_B_TO_FSB(mp, offset + len); shift_fsb = XFS_B_TO_FSB(mp, len); error = xfs_free_file_space(ip, offset, len); if (error) return error; + /* + * Trim eofblocks to avoid shifting uninitialized post-eof preallocation + * into the accessible region of the file. + */ + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return error; + } + + /* + * Writeback and invalidate cache for the remainder of the file as we're + * about to shift down every extent from the collapse range to EOF. The + * free of the collapse range above might have already done some of + * this, but we shouldn't rely on it to do anything outside of the range + * that was freed. + */ + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + offset + len, -1); + if (error) + return error; + error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + (offset + len) >> PAGE_CACHE_SHIFT, -1); + if (error) + return error; + while (!error && !done) { tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); /* @@ -1505,10 +1533,10 @@ xfs_collapse_file_space( * We are using the write transaction in which max 2 bmbt * updates are allowed */ - error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, - shift_fsb, ¤t_ext, - &first_block, &free_list, - XFS_BMAP_MAX_SHIFT_EXTENTS); + start_fsb = next_fsb; + error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, + &done, &next_fsb, &first_block, &free_list, + XFS_BMAP_MAX_SHIFT_EXTENTS); if (error) goto out; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0fe36e4d5cef..eb596b419942 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -291,12 +291,22 @@ xfs_file_read_iter( if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } @@ -632,10 +642,19 @@ xfs_file_dio_aio_write( if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* |