From 105664df51648d6376bda2895a890e786d0426ee Mon Sep 17 00:00:00 2001
From: Calvin Owens <calvinowens@fb.com>
Date: Fri, 31 Mar 2017 07:57:55 -0700
Subject: xfs: Honor FALLOC_FL_KEEP_SIZE when punching ends of files

When punching past EOF on XFS, fallocate(mode=PUNCH_HOLE|KEEP_SIZE) will
round the file size up to the nearest multiple of PAGE_SIZE:

  calvinow@vm-disks/generic-xfs-1 ~$ dd if=/dev/urandom of=test bs=2048 count=1
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
    Size: 2048            Blocks: 8          IO Block: 4096   regular file
  calvinow@vm-disks/generic-xfs-1 ~$ fallocate -n -l 2048 -o 2048 -p test
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
    Size: 4096            Blocks: 8          IO Block: 4096   regular file

Commit 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") replaced
xfs_zero_remaining_bytes() with calls to iomap helpers. The new helpers
don't enforce that [pos,offset) lies strictly on [0,i_size) when being
called from xfs_free_file_space(), so by "leaking" these ranges into
xfs_zero_range() we get this buggy behavior.

Fix this by reintroducing the checks xfs_zero_remaining_bytes() did
against i_size at the bottom of xfs_free_file_space().

Reported-by: Aaron Gao <gzh@fb.com>
Fixes: 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Brian Foster <bfoster@redhat.com>
Cc: <stable@vger.kernel.org> # 4.8+
Signed-off-by: Calvin Owens <calvinowens@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs/xfs/xfs_bmap_util.c')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8b75dcea5966..828532ce0adc 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
 	/*
 	 * Now that we've unmap all full blocks we'll have to zero out any
 	 * partial block at the beginning and/or end.  xfs_zero_range is
-	 * smart enough to skip any holes, including those we just created.
+	 * smart enough to skip any holes, including those we just created,
+	 * but we must take care not to zero beyond EOF and enlarge i_size.
 	 */
+
+	if (offset >= XFS_ISIZE(ip))
+		return 0;
+
+	if (offset + len > XFS_ISIZE(ip))
+		len = XFS_ISIZE(ip) - offset;
+
 	return xfs_zero_range(ip, offset, len, NULL);
 }
 
-- 
cgit v1.2.3


From 9c4f29d39168bb9363189f0d54ee1202372e7d9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Mar 2017 14:53:35 -0700
Subject: xfs: factor out a xfs_bmap_is_real_extent helper

This checks for all the non-normal extent types, including handling both
encodings of delayed allocations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.h | 12 ++++++++++++
 fs/xfs/xfs_aops.c        |  4 +---
 fs/xfs/xfs_bmap_util.c   |  7 +++----
 fs/xfs/xfs_reflink.c     | 21 ++++-----------------
 4 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'fs/xfs/xfs_bmap_util.c')

diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index cdef87db5262..a6e612cabe15 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -171,6 +171,18 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags)
 	{ BMAP_COWFORK,		"COW" }
 
 
+/*
+ * Return true if the extent is a real, allocated extent, or false if it is  a
+ * delayed allocation, and unwritten extent or a hole.
+ */
+static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
+{
+	return irec->br_state != XFS_EXT_UNWRITTEN &&
+		irec->br_startblock != HOLESTARTBLOCK &&
+		irec->br_startblock != DELAYSTARTBLOCK &&
+		!isnullstartblock(irec->br_startblock);
+}
+
 /*
  * This macro is used to determine how many extents will be shifted
  * in one write transaction. We could require two splits,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 61494295d92f..8567b385fff4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1276,9 +1276,7 @@ xfs_get_blocks(
 	 * For unwritten extents do not report a disk address in the buffered
 	 * read case (treat as if we're reading into a hole).
 	 */
-	if (imap.br_startblock != HOLESTARTBLOCK &&
-	    imap.br_startblock != DELAYSTARTBLOCK &&
-	    !ISUNWRITTEN(&imap))
+	if (xfs_bmap_is_real_extent(&imap))
 		xfs_map_buffer(inode, bh_result, &imap, offset);
 
 	/*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 828532ce0adc..4d1920e594b0 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -448,10 +448,9 @@ xfs_getbmap_adjust_shared(
 	next_map->br_blockcount = 0;
 
 	/* Only written data blocks can be shared. */
-	if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK ||
-	    map->br_startblock == DELAYSTARTBLOCK ||
-	    map->br_startblock == HOLESTARTBLOCK ||
-	    ISUNWRITTEN(map))
+	if (!xfs_is_reflink_inode(ip) ||
+	    whichfork != XFS_DATA_FORK ||
+	    !xfs_bmap_is_real_extent(map))
 		return 0;
 
 	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 4a84c5ea266d..c0f3754caca2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -206,11 +206,7 @@ xfs_reflink_trim_around_shared(
 	int			error = 0;
 
 	/* Holes, unwritten, and delalloc extents cannot be shared */
-	if (!xfs_is_reflink_inode(ip) ||
-	    ISUNWRITTEN(irec) ||
-	    irec->br_startblock == HOLESTARTBLOCK ||
-	    irec->br_startblock == DELAYSTARTBLOCK ||
-	    isnullstartblock(irec->br_startblock)) {
+	if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
 		*shared = false;
 		return 0;
 	}
@@ -1045,12 +1041,12 @@ xfs_reflink_remap_extent(
 	xfs_off_t		new_isize)
 {
 	struct xfs_mount	*mp = ip->i_mount;
+	bool			real_extent = xfs_bmap_is_real_extent(irec);
 	struct xfs_trans	*tp;
 	xfs_fsblock_t		firstfsb;
 	unsigned int		resblks;
 	struct xfs_defer_ops	dfops;
 	struct xfs_bmbt_irec	uirec;
-	bool			real_extent;
 	xfs_filblks_t		rlen;
 	xfs_filblks_t		unmap_len;
 	xfs_off_t		newlen;
@@ -1059,11 +1055,6 @@ xfs_reflink_remap_extent(
 	unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
 	trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
 
-	/* Only remap normal extents. */
-	real_extent =  (irec->br_startblock != HOLESTARTBLOCK &&
-			irec->br_startblock != DELAYSTARTBLOCK &&
-			!ISUNWRITTEN(irec));
-
 	/* No reflinking if we're low on space */
 	if (real_extent) {
 		error = xfs_reflink_ag_has_free_space(mp,
@@ -1359,9 +1350,7 @@ xfs_reflink_dirty_extents(
 			goto out;
 		if (nmaps == 0)
 			break;
-		if (map[0].br_startblock == HOLESTARTBLOCK ||
-		    map[0].br_startblock == DELAYSTARTBLOCK ||
-		    ISUNWRITTEN(&map[0]))
+		if (!xfs_bmap_is_real_extent(&map[0]))
 			goto next;
 
 		map[1] = map[0];
@@ -1435,9 +1424,7 @@ xfs_reflink_clear_inode_flag(
 			return error;
 		if (nmaps == 0)
 			break;
-		if (map.br_startblock == HOLESTARTBLOCK ||
-		    map.br_startblock == DELAYSTARTBLOCK ||
-		    ISUNWRITTEN(&map))
+		if (!xfs_bmap_is_real_extent(&map))
 			goto next;
 
 		agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
-- 
cgit v1.2.3


From 3b4683c294095b5f777c03307ef8c60f47320e12 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 11 Apr 2017 10:50:05 -0700
Subject: xfs: drop iolock from reclaim context to appease lockdep

Lockdep complains about use of the iolock in inode reclaim context
because it doesn't understand that reclaim has the last reference to
the inode, and thus an iolock->reclaim->iolock deadlock is not
possible.

The iolock is technically not necessary in xfs_inactive() and was
only added to appease an assert in xfs_free_eofblocks(), which can
be called from other non-reclaim contexts. Therefore, just kill the
assert and drop the use of the iolock from reclaim context to quiet
lockdep.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 8 +++-----
 fs/xfs/xfs_inode.c     | 9 +++++----
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'fs/xfs/xfs_bmap_util.c')

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 4d1920e594b0..de94798f1c1b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -903,9 +903,9 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
 }
 
 /*
- * This is called by xfs_inactive to free any blocks beyond eof
- * when the link count isn't zero and by xfs_dm_punch_hole() when
- * punching a hole to EOF.
+ * This is called to free any blocks beyond eof. The caller must hold
+ * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
+ * reference to the inode.
  */
 int
 xfs_free_eofblocks(
@@ -920,8 +920,6 @@ xfs_free_eofblocks(
 	struct xfs_bmbt_irec	imap;
 	struct xfs_mount	*mp = ip->i_mount;
 
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-
 	/*
 	 * Figure out if there are any blocks beyond the end
 	 * of the file.  If not, then there is nothing to do.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7605d8396596..ec9826c56500 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1906,12 +1906,13 @@ xfs_inactive(
 		 * force is true because we are evicting an inode from the
 		 * cache. Post-eof blocks must be freed, lest we end up with
 		 * broken free space accounting.
+		 *
+		 * Note: don't bother with iolock here since lockdep complains
+		 * about acquiring it in reclaim context. We have the only
+		 * reference to the inode at this point anyways.
 		 */
-		if (xfs_can_free_eofblocks(ip, true)) {
-			xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		if (xfs_can_free_eofblocks(ip, true))
 			xfs_free_eofblocks(ip);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-		}
 
 		return;
 	}
-- 
cgit v1.2.3


From 4f1adf3373f072246c14119b2aa6dfb4d6510a43 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 19 Apr 2017 15:19:32 -0700
Subject: xfs: more do_div cleanups

On some architectures do_div does the pointer compare
trick to make sure that we've sent it an unsigned 64-bit
number.  (Why unsigned?  I don't know.)

Fix up the few places that squawk about this; in
xfs_bmap_wants_extents() we just used a bare int64_t so change
that to unsigned.

In xfs_adjust_extent_unmap_boundaries() all we wanted was the
mod, and we have an xfs-specific function to handle that w/o
side effects, which includes proper casting for do_div.

In xfs_daddr_to_ag[b]no, we were using the wrong type anyway;
XFS_BB_TO_FSBT returns a block in the filesystem, so use
xfs_rfsblock_t not xfs_daddr_t, and gain the unsignedness
from that type as a bonus.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 2 +-
 fs/xfs/xfs_bmap_util.c   | 5 +----
 fs/xfs/xfs_mount.h       | 4 ++--
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs/xfs/xfs_bmap_util.c')

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7f42f6067eb5..0fdff08145c1 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4887,7 +4887,7 @@ xfs_bmap_del_extent_delay(
 	ASSERT(got_endoff >= del_endoff);
 
 	if (isrt) {
-		int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
 
 		do_div(rtexts, mp->m_sb.sb_rextsize);
 		xfs_mod_frextents(mp, rtexts);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index de94798f1c1b..7ac80a1facf2 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1206,11 +1206,8 @@ xfs_adjust_extent_unmap_boundaries(
 		return error;
 
 	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-		xfs_daddr_t	block;
-
 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-		block = imap.br_startblock;
-		mod = do_div(block, mp->m_sb.sb_rextsize);
+		mod = do_mod(imap.br_startblock, mp->m_sb.sb_rextsize);
 		if (mod)
 			*startoffset_fsb += mp->m_sb.sb_rextsize - mod;
 	}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 22b2185e93a0..9fa312a41c93 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 static inline xfs_agnumber_t
 xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
 {
-	xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
+	xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
 	do_div(ld, mp->m_sb.sb_agblocks);
 	return (xfs_agnumber_t) ld;
 }
@@ -321,7 +321,7 @@ xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
 static inline xfs_agblock_t
 xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 {
-	xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d);
+	xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-- 
cgit v1.2.3