From 9cfb9b47479e237d217dbcfafe034cbf98f45909 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:51:06 -0800 Subject: xfs: provide a centralized method for verifying inline fork data Replace the current haphazard dir2 shortform verifier callsites with a centralized verifier function that can be called either with the default verifier functions or with a custom set. This helps us strengthen integrity checking while providing us with flexibility for repair tools. xfs_repair wants this to be able to supply its own verifier functions when trying to fix possibly corrupt metadata. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_icache.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/xfs/xfs_icache.c') diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3861d61fb265..c9c7c02bc2bb 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -473,6 +473,11 @@ xfs_iget_cache_miss( if (error) goto out_destroy; + if (!xfs_inode_verify_forks(ip)) { + error = -EFSCORRUPTED; + goto out_destroy; + } + trace_xfs_iget_miss(ip); if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) { -- cgit v1.2.3 From be78ff0e72778eb4df4aac66edb9e97462bfe00d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 16 Jan 2018 19:03:59 -0800 Subject: xfs: recheck reflink / dirty page status before freeing CoW reservations Eryu Guan reported seeing occasional hangs when running generic/269 with a new fsstress that supports clonerange/deduperange. The cause of this hang is an infinite loop when we convert the CoW fork extents from unwritten to real just prior to writing the pages out; the infinite loop happens because there's nothing in the CoW fork to convert, and so it spins forever. The fundamental issue here is that when we go to perform these CoW fork conversions, we're supposed to have an extent waiting for us, but the low space CoW reaper has snuck in and blown them away! There are four conditions that can dissuade the reaper from touching our file -- no reflink iflag; dirty page cache; writeback in progress; or directio in progress. We check the four conditions prior to taking the locks, but we neglect to recheck them once we have the locks, which is how we end up whacking the writeback that's in progress. Therefore, refactor the four checks into a helper function and call it once again once we have the locks to make sure we really want to reap the inode. While we're at it, add an ASSERT for this weird condition so that we'll fail noisily if we ever screw this up again. Reported-by: Eryu Guan Signed-off-by: Darrick J. Wong Tested-by: Eryu Guan Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_bmap.c | 10 +++++++- fs/xfs/xfs_icache.c | 63 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 22 deletions(-) (limited to 'fs/xfs/xfs_icache.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 140744700b07..6e6f3cb88cc2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4304,8 +4304,16 @@ xfs_bmapi_write( while (bno < end && n < *nmap) { bool need_alloc = false, wasdelay = false; - /* in hole or beyoned EOF? */ + /* in hole or beyond EOF? */ if (eof || bma.got.br_startoff > bno) { + /* + * CoW fork conversions should /never/ hit EOF or + * holes. There should always be something for us + * to work on. + */ + ASSERT(!((flags & XFS_BMAPI_CONVERT) && + (flags & XFS_BMAPI_COWFORK))); + if (flags & XFS_BMAPI_DELALLOC) { /* * For the COW fork we can reasonably get a diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c9c7c02bc2bb..2da7a2ee34bc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1655,28 +1655,15 @@ xfs_inode_clear_eofblocks_tag( } /* - * Automatic CoW Reservation Freeing - * - * These functions automatically garbage collect leftover CoW reservations - * that were made on behalf of a cowextsize hint when we start to run out - * of quota or when the reservations sit around for too long. If the file - * has dirty pages or is undergoing writeback, its CoW reservations will - * be retained. - * - * The actual garbage collection piggybacks off the same code that runs - * the speculative EOF preallocation garbage collector. + * Set ourselves up to free CoW blocks from this file. If it's already clean + * then we can bail out quickly, but otherwise we must back off if the file + * is undergoing some kind of write. */ -STATIC int -xfs_inode_free_cowblocks( +static bool +xfs_prep_free_cowblocks( struct xfs_inode *ip, - int flags, - void *args) + struct xfs_ifork *ifp) { - int ret; - struct xfs_eofblocks *eofb = args; - int match; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. @@ -1684,7 +1671,7 @@ xfs_inode_free_cowblocks( if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); - return 0; + return false; } /* @@ -1695,6 +1682,35 @@ xfs_inode_free_cowblocks( mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) + return false; + + return true; +} + +/* + * Automatic CoW Reservation Freeing + * + * These functions automatically garbage collect leftover CoW reservations + * that were made on behalf of a cowextsize hint when we start to run out + * of quota or when the reservations sit around for too long. If the file + * has dirty pages or is undergoing writeback, its CoW reservations will + * be retained. + * + * The actual garbage collection piggybacks off the same code that runs + * the speculative EOF preallocation garbage collector. + */ +STATIC int +xfs_inode_free_cowblocks( + struct xfs_inode *ip, + int flags, + void *args) +{ + struct xfs_eofblocks *eofb = args; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + int match; + int ret = 0; + + if (!xfs_prep_free_cowblocks(ip, ifp)) return 0; if (eofb) { @@ -1715,7 +1731,12 @@ xfs_inode_free_cowblocks( xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); + /* + * Check again, nobody else should be able to dirty blocks or change + * the reflink iflag now that we have the first two locks held. + */ + if (xfs_prep_free_cowblocks(ip, ifp)) + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); -- cgit v1.2.3 From acd1d71598f7654b6d7718bcbe979992295c672a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 26 Jan 2018 11:24:40 -0800 Subject: xfs: preserve i_rdev when recycling a reclaimable inode Commit 66f364649d870 ("xfs: remove if_rdev") moved storing of rdev value for special inodes to VFS inodes, but forgot to preserve the value of i_rdev when recycling a reclaimable xfs_inode. This was detected by xfstest overlay/017 with inodex=on mount option and xfs base fs. The test does a lookup of overlay chardev and blockdev right after drop caches. Overlayfs inodes hold a reference on underlying xfs inodes when mount option index=on is configured. If drop caches reclaim xfs inodes, before it relclaims overlayfs inodes, that can sometimes leave a reclaimable xfs inode and that test hits that case quite often. When that happens, the xfs inode cache remains broken (zere i_rdev) until the next cycle mount or drop caches. Fixes: 66f364649d870 ("xfs: remove if_rdev") Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/xfs/xfs_icache.c') diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 2da7a2ee34bc..73bbeac739ed 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -295,6 +295,7 @@ xfs_reinit_inode( uint32_t generation = inode->i_generation; uint64_t version = inode->i_version; umode_t mode = inode->i_mode; + dev_t dev = inode->i_rdev; error = inode_init_always(mp->m_super, inode); @@ -302,6 +303,7 @@ xfs_reinit_inode( inode->i_generation = generation; inode->i_version = version; inode->i_mode = mode; + inode->i_rdev = dev; return error; } -- cgit v1.2.3