From f0bc9985fe8bf4377d5557cd7957d9be43ec8861 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 21 Jan 2014 16:44:57 -0600 Subject: xfs: clean up xfs_buftarg Clean up the xfs_buftarg structure a bit: - remove bt_bsize which is never used - replace bt_sshift with bt_ssize; we only ever shift it back Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 5 ++--- fs/xfs/xfs_buf.h | 3 +-- fs/xfs/xfs_ioctl.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9fccfb594291..b664bce57bfc 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -445,7 +445,7 @@ _xfs_buf_find( numbytes = BBTOB(numblks); /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(numbytes < (1 << btp->bt_sshift))); + ASSERT(!(numbytes < btp->bt_ssize)); ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); /* @@ -1599,8 +1599,7 @@ xfs_setsize_buftarg( unsigned int blocksize, unsigned int sectorsize) { - btp->bt_bsize = blocksize; - btp->bt_sshift = ffs(sectorsize) - 1; + btp->bt_ssize = sectorsize; btp->bt_smask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 1cf21a4a9f22..4ef949aebb04 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -93,8 +93,7 @@ typedef struct xfs_buftarg { struct block_device *bt_bdev; struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; - unsigned int bt_bsize; - unsigned int bt_sshift; + unsigned int bt_ssize; size_t bt_smask; /* LRU control structures */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 518aa56b8f2e..584e092415d0 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1583,7 +1583,7 @@ xfs_file_ioctl( XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - da.d_mem = da.d_miniosz = 1 << target->bt_sshift; + da.d_mem = da.d_miniosz = target->bt_ssize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) -- cgit v1.2.3 From 6da54179b3f1bb6a302fd5f3b38fae32ee463ed1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 21 Jan 2014 16:45:52 -0600 Subject: xfs: rename xfs_buftarg structure members In preparation for adding new members to the structure, give these old ones more descriptive names: bt_ssize -> bt_meta_sectorsize bt_smask -> bt_meta_sectormask Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 8 ++++---- fs/xfs/xfs_buf.h | 4 ++-- fs/xfs/xfs_file.c | 4 ++-- fs/xfs/xfs_ioctl.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b664bce57bfc..a526f8d2dc6f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -445,8 +445,8 @@ _xfs_buf_find( numbytes = BBTOB(numblks); /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(numbytes < btp->bt_ssize)); - ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); + ASSERT(!(numbytes < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask)); /* * Corrupted block numbers can get through to here, unfortunately, so we @@ -1599,8 +1599,8 @@ xfs_setsize_buftarg( unsigned int blocksize, unsigned int sectorsize) { - btp->bt_ssize = sectorsize; - btp->bt_smask = sectorsize - 1; + btp->bt_meta_sectorsize = sectorsize; + btp->bt_meta_sectormask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { char name[BDEVNAME_SIZE]; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 4ef949aebb04..d5d88dda4d31 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -93,8 +93,8 @@ typedef struct xfs_buftarg { struct block_device *bt_bdev; struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; - unsigned int bt_ssize; - size_t bt_smask; + unsigned int bt_meta_sectorsize; + size_t bt_meta_sectormask; /* LRU control structures */ struct shrinker bt_shrinker; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e00121592632..d01745f748ac 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -261,7 +261,7 @@ xfs_file_aio_read( xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (size & target->bt_smask)) { + if ((pos | size) & target->bt_meta_sectormask) { if (pos == i_size_read(inode)) return 0; return -XFS_ERROR(EINVAL); @@ -641,7 +641,7 @@ xfs_file_dio_aio_write( struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (count & target->bt_smask)) + if ((pos | count) & target->bt_meta_sectormask) return -XFS_ERROR(EINVAL); if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 584e092415d0..3dc60ed9572a 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1583,7 +1583,7 @@ xfs_file_ioctl( XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - da.d_mem = da.d_miniosz = target->bt_ssize; + da.d_mem = da.d_miniosz = target->bt_meta_sectorsize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) -- cgit v1.2.3 From 7c71ee78031c248dca13fc94dea9a4cc217db6cf Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 21 Jan 2014 16:46:23 -0600 Subject: xfs: allow logical-sector sized O_DIRECT Some time ago, mkfs.xfs started picking the storage physical sector size as the default filesystem "sector size" in order to avoid RMW costs incurred by doing IOs at logical sector size alignments. However, this means that for a filesystem made with i.e. a 4k sector size on an "advanced format" 4k/512 disk, 512-byte direct IOs are no longer allowed. This means that XFS has essentially turned this AF drive into a hard 4K device, from the filesystem on up. XFS's mkfs-specified "sector size" is really just controlling the minimum size & alignment of filesystem metadata. There is no real need to tightly couple XFS's minimal metadata size to the minimum allowed direct IO size; XFS can continue doing metadata in optimal sizes, but still allow smaller DIOs for apps which issue them, for whatever reason. This patch adds a new field to the xfs_buftarg, so that we now track 2 sizes: 1) The metadata sector size, which is the minimum unit and alignment of IO which will be performed by metadata operations. 2) The device logical sector size The first is used internally by the file system for metadata alignment and IOs. The second is used for the minimum allowed direct IO alignment. This has passed xfstests on filesystems made with 4k sectors, including when run under the patch I sent to ignore XFS_IOC_DIOINFO, and issue 512 DIOs anyway. I also directly tested end of block behavior on preallocated, sparse, and existing files when we do a 512 IO into a 4k file on a 4k-sector filesystem, to be sure there were no unexpected behaviors. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 5 +++++ fs/xfs/xfs_buf.h | 15 +++++++++++++++ fs/xfs/xfs_file.c | 7 +++++-- fs/xfs/xfs_ioctl.c | 2 +- 4 files changed, 26 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a526f8d2dc6f..51757113a822 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1599,6 +1599,7 @@ xfs_setsize_buftarg( unsigned int blocksize, unsigned int sectorsize) { + /* Set up metadata sector size info */ btp->bt_meta_sectorsize = sectorsize; btp->bt_meta_sectormask = sectorsize - 1; @@ -1613,6 +1614,10 @@ xfs_setsize_buftarg( return EINVAL; } + /* Set up device logical sector size mask */ + btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev); + btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1; + return 0; } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d5d88dda4d31..995339534db6 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -88,6 +88,19 @@ typedef unsigned int xfs_buf_flags_t; */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +/* + * The xfs_buftarg contains 2 notions of "sector size" - + * + * 1) The metadata sector size, which is the minimum unit and + * alignment of IO which will be performed by metadata operations. + * 2) The device logical sector size + * + * The first is specified at mkfs time, and is stored on-disk in the + * superblock's sb_sectsize. + * + * The latter is derived from the underlying device, and controls direct IO + * alignment constraints. + */ typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; @@ -95,6 +108,8 @@ typedef struct xfs_buftarg { struct xfs_mount *bt_mount; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; + size_t bt_logical_sectorsize; + size_t bt_logical_sectormask; /* LRU control structures */ struct shrinker bt_shrinker; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d01745f748ac..2e7989e3a2d6 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -261,7 +261,8 @@ xfs_file_aio_read( xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos | size) & target->bt_meta_sectormask) { + /* DIO must be aligned to device logical sector size */ + if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; return -XFS_ERROR(EINVAL); @@ -641,9 +642,11 @@ xfs_file_dio_aio_write( struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos | count) & target->bt_meta_sectormask) + /* DIO must be aligned to device logical sector size */ + if ((pos | count) & target->bt_logical_sectormask) return -XFS_ERROR(EINVAL); + /* "unaligned" here means not aligned to a filesystem block */ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3dc60ed9572a..bcfe61202115 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1583,7 +1583,7 @@ xfs_file_ioctl( XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - da.d_mem = da.d_miniosz = target->bt_meta_sectorsize; + da.d_mem = da.d_miniosz = target->bt_logical_sectorsize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) -- cgit v1.2.3