summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_ag.c8
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c55
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c28
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c10
-rw-r--r--fs/xfs/scrub/inode.c18
-rw-r--r--fs/xfs/xfs_inode.c13
-rw-r--r--fs/xfs/xfs_ioctl.c27
-rw-r--r--fs/xfs/xfs_rtalloc.c49
9 files changed, 174 insertions, 37 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 778ec52cce70..ee9ec0c50bec 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -804,6 +804,14 @@ xfs_ag_shrink_space(
args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
/*
+ * Make sure that the last inode cluster cannot overlap with the new
+ * end of the AG, even if it's sparse.
+ */
+ error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
+ if (error)
+ return error;
+
+ /*
* Disable perag reservations so it doesn't cause the allocation request
* to fail. We'll reestablish reservation before we return.
*/
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 57d9cb632983..aaf8805a82df 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2928,3 +2928,58 @@ xfs_ialloc_calc_rootino(
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
}
+
+/*
+ * Ensure there are not sparse inode clusters that cross the new EOAG.
+ *
+ * This is a no-op for non-spinode filesystems since clusters are always fully
+ * allocated and checking the bnobt suffices. However, a spinode filesystem
+ * could have a record where the upper inodes are free blocks. If those blocks
+ * were removed from the filesystem, the inode record would extend beyond EOAG,
+ * which will be flagged as corruption.
+ */
+int
+xfs_ialloc_check_shrink(
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ struct xfs_buf *agibp,
+ xfs_agblock_t new_length)
+{
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length);
+ int has;
+ int error;
+
+ if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
+ return 0;
+
+ pag = xfs_perag_get(mp, agno);
+ cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
+
+ /* Look up the inobt record that would correspond to the new EOFS. */
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
+ if (error || !has)
+ goto out;
+
+ error = xfs_inobt_get_rec(cur, &rec, &has);
+ if (error)
+ goto out;
+
+ if (!has) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* If the record covers inodes that would be beyond EOFS, bail out. */
+ if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
+ error = -ENOSPC;
+ goto out;
+ }
+out:
+ xfs_btree_del_cursor(cur, error);
+ xfs_perag_put(pag);
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 9df7c80408ff..9a2112b4ad5e 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -122,4 +122,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
+int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno,
+ struct xfs_buf *agibp, xfs_agblock_t new_length);
+
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 04ce361688f7..84ea2e0af9f0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -592,23 +592,27 @@ xfs_inode_validate_extsize(
/*
* This comment describes a historic gap in this verifier function.
*
- * On older kernels, the extent size hint verifier doesn't check that
- * the extent size hint is an integer multiple of the realtime extent
- * size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
- * The verifier has always enforced the alignment rule for regular
- * files with the REALTIME flag set.
+ * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
+ * function has never checked that the extent size hint is an integer
+ * multiple of the realtime extent size. Since we allow users to set
+ * this combination on non-rt filesystems /and/ to change the rt
+ * extent size when adding a rt device to a filesystem, the net effect
+ * is that users can configure a filesystem anticipating one rt
+ * geometry and change their minds later. Directories do not use the
+ * extent size hint, so this is harmless for them.
*
* If a directory with a misaligned extent size hint is allowed to
* propagate that hint into a new regular realtime file, the result
* is that the inode cluster buffer verifier will trigger a corruption
- * shutdown the next time it is run.
+ * shutdown the next time it is run, because the verifier has always
+ * enforced the alignment rule for regular files.
*
- * Unfortunately, there could be filesystems with these misconfigured
- * directories in the wild, so we cannot add a check to this verifier
- * at this time because that will result a new source of directory
- * corruption errors when reading an existing filesystem. Instead, we
- * permit the misconfiguration to pass through the verifiers so that
- * callers of this function can correct and mitigate externally.
+ * Because we allow administrators to set a new rt extent size when
+ * adding a rt section, we cannot add a check to this verifier because
+ * that will result a new source of directory corruption errors when
+ * reading an existing filesystem. Instead, we rely on callers to
+ * decide when alignment checks are appropriate, and fix things up as
+ * needed.
*/
if (rt_flag)
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 8d595a5c4abd..16f723ebe8dd 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -143,16 +143,14 @@ xfs_trans_log_inode(
}
/*
- * Inode verifiers on older kernels don't check that the extent size
- * hint is an integer multiple of the rt extent size on a directory
- * with both rtinherit and extszinherit flags set. If we're logging a
- * directory that is misconfigured in this way, clear the hint.
+ * Inode verifiers do not check that the extent size hint is an integer
+ * multiple of the rt extent size on a directory with both rtinherit
+ * and extszinherit flags set. If we're logging a directory that is
+ * misconfigured in this way, clear the hint.
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
(ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
- xfs_info_once(ip->i_mount,
- "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0;
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 61f90b2c9430..76fbc7ca4cec 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -73,11 +73,25 @@ xchk_inode_extsize(
uint16_t flags)
{
xfs_failaddr_t fa;
+ uint32_t value = be32_to_cpu(dip->di_extsize);
- fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
- mode, flags);
+ fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags);
if (fa)
xchk_ino_set_corrupt(sc, ino);
+
+ /*
+ * XFS allows a sysadmin to change the rt extent size when adding a rt
+ * section to a filesystem after formatting. If there are any
+ * directories with extszinherit and rtinherit set, the hint could
+ * become misaligned with the new rextsize. The verifier doesn't check
+ * this, because we allow rtinherit directories even without an rt
+ * device. Flag this as an administrative warning since we will clean
+ * this up eventually.
+ */
+ if ((flags & XFS_DIFLAG_RTINHERIT) &&
+ (flags & XFS_DIFLAG_EXTSZINHERIT) &&
+ value % sc->mp->m_sb.sb_rextsize > 0)
+ xchk_ino_set_warning(sc, ino);
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a835ceb79ba5..990b72ae3635 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2763,6 +2763,19 @@ xfs_remove(
error = xfs_droplink(tp, ip);
if (error)
goto out_trans_cancel;
+
+ /*
+ * Point the unlinked child directory's ".." entry to the root
+ * directory to eliminate back-references to inodes that may
+ * get freed before the child directory is closed. If the fs
+ * gets shrunk, this can lead to dirent inode validation errors.
+ */
+ if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+ return error;
+ }
} else {
/*
* When removing a non-directory we need to log the parent
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 65270e63c032..16039ea10ac9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1065,7 +1065,24 @@ xfs_fill_fsxattr(
fileattr_fill_xflags(fa, xfs_ip2xflags(ip));
- fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
+ if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) {
+ fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
+ } else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ /*
+ * Don't let a misaligned extent size hint on a directory
+ * escape to userspace if it won't pass the setattr checks
+ * later.
+ */
+ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ ip->i_extsize % mp->m_sb.sb_rextsize > 0) {
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE |
+ FS_XFLAG_EXTSZINHERIT);
+ fa->fsx_extsize = 0;
+ } else {
+ fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
+ }
+ }
+
if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize);
fa->fsx_projid = ip->i_projid;
@@ -1292,10 +1309,10 @@ xfs_ioctl_setattr_check_extsize(
new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
/*
- * Inode verifiers on older kernels don't check that the extent size
- * hint is an integer multiple of the rt extent size on a directory
- * with both rtinherit and extszinherit flags set. Don't let sysadmins
- * misconfigure directories.
+ * Inode verifiers do not check that the extent size hint is an integer
+ * multiple of the rt extent size on a directory with both rtinherit
+ * and extszinherit flags set. Don't let sysadmins misconfigure
+ * directories.
*/
if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
(new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 4e7be6b4ca8e..699066fb9052 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -923,16 +923,41 @@ xfs_growfs_rt(
uint8_t *rsum_cache; /* old summary cache */
sbp = &mp->m_sb;
- /*
- * Initial error checking.
- */
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
- (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
- (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
+
+ /* Needs to have been mounted with an rt device. */
+ if (!XFS_IS_REALTIME_MOUNT(mp))
+ return -EINVAL;
+ /*
+ * Mount should fail if the rt bitmap/summary files don't load, but
+ * we'll check anyway.
+ */
+ if (!mp->m_rbmip || !mp->m_rsumip)
+ return -EINVAL;
+
+ /* Shrink not supported. */
+ if (in->newblocks <= sbp->sb_rblocks)
+ return -EINVAL;
+
+ /* Can only change rt extent size when adding rt volume. */
+ if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
+ return -EINVAL;
+
+ /* Range check the extent size. */
+ if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
+ XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
return -EINVAL;
- if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
+
+ /* Unsupported realtime features. */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) ||
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ nrblocks = in->newblocks;
+ error = xfs_sb_validate_fsb_count(sbp, nrblocks);
+ if (error)
return error;
/*
* Read in the last block of the device, make sure it exists.
@@ -996,7 +1021,8 @@ xfs_growfs_rt(
((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
bmbno < nrbmblocks;
bmbno++) {
- xfs_trans_t *tp;
+ struct xfs_trans *tp;
+ xfs_rfsblock_t nrblocks_step;
*nmp = *mp;
nsbp = &nmp->m_sb;
@@ -1005,10 +1031,9 @@ xfs_growfs_rt(
*/
nsbp->sb_rextsize = in->extsize;
nsbp->sb_rbmblocks = bmbno + 1;
- nsbp->sb_rblocks =
- XFS_RTMIN(nrblocks,
- nsbp->sb_rbmblocks * NBBY *
- nsbp->sb_blocksize * nsbp->sb_rextsize);
+ nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
+ nsbp->sb_rextsize;
+ nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
nsbp->sb_rextents = nsbp->sb_rblocks;
do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
ASSERT(nsbp->sb_rextents != 0);