diff options
37 files changed, 1023 insertions, 509 deletions
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index f461d6c33534..86de8a1ad91c 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -210,6 +210,28 @@ When mounting an XFS filesystem, the following options are accepted. inconsistent namespace presentation during or after a failover event. +Deprecation of V4 Format +======================== + +The V4 filesystem format lacks certain features that are supported by +the V5 format, such as metadata checksumming, strengthened metadata +verification, and the ability to store timestamps past the year 2038. +Because of this, the V4 format is deprecated. All users should upgrade +by backing up their files, reformatting, and restoring from the backup. + +Administrators and users can detect a V4 filesystem by running xfs_info +against a filesystem mountpoint and checking for a string containing +"crc=". If no such string is found, please upgrade xfsprogs to the +latest version and try again. + +The deprecation will take place in two parts. Support for mounting V4 +filesystems can now be disabled at kernel build time via Kconfig option. +The option will default to yes until September 2025, at which time it +will be changed to default to no. In September 2030, support will be +removed from the codebase entirely. + +Note: Distributors may choose to withdraw V4 format support earlier than +the dates listed above. Deprecated Mount Options ======================== @@ -217,6 +239,9 @@ Deprecated Mount Options =========================== ================ Name Removal Schedule =========================== ================ +Mounting with V4 filesystem September 2030 +ikeep/noikeep September 2025 +attr2/noattr2 September 2025 =========================== ================ @@ -331,7 +356,12 @@ The following sysctls are available for the XFS filesystem: Deprecated Sysctls ================== -None at present. +=========================== ================ + Name Removal Schedule +=========================== ================ +fs.xfs.irix_sgid_inherit September 2025 +fs.xfs.irix_symlink_mode September 2025 +=========================== ================ Removed Sysctls diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index e685299eb3d2..9fac5ea8d0e4 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -22,6 +22,31 @@ config XFS_FS system of your root partition is compiled as a module, you'll need to use an initial ramdisk (initrd) to boot. +config XFS_SUPPORT_V4 + bool "Support deprecated V4 (crc=0) format" + depends on XFS_FS + default y + help + The V4 filesystem format lacks certain features that are supported + by the V5 format, such as metadata checksumming, strengthened + metadata verification, and the ability to store timestamps past the + year 2038. Because of this, the V4 format is deprecated. All users + should upgrade by backing up their files, reformatting, and restoring + from the backup. + + Administrators and users can detect a V4 filesystem by running + xfs_info against a filesystem mountpoint and checking for a string + beginning with "crc=". If the string "crc=0" is found, the + filesystem is a V4 filesystem. If no such string is found, please + upgrade xfsprogs to the latest version and try again. + + This option will become default N in September 2025. Support for the + V4 format will be removed entirely in September 2030. Distributors + can say N here to withdraw support earlier. + + To continue supporting the old V4 format (crc=0), say Y. + To close off an attack surface, say N. + config XFS_QUOTA bool "XFS Quota support" depends on XFS_FS diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 3f80cede7406..48d8e9caf86f 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -96,8 +96,6 @@ xfs_attr3_rmt_verify( { struct xfs_attr3_rmt_hdr *rmt = ptr; - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return __this_address; if (!xfs_verify_magic(bp, rmt->rm_magic)) return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1b0a01b06a05..d9a692484eae 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5046,20 +5046,25 @@ xfs_bmap_del_extent_real( flags = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; xfs_filblks_t len; xfs_extlen_t mod; - bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize, - &mod); - ASSERT(mod == 0); len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, &mod); ASSERT(mod == 0); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; + if (!(bflags & XFS_BMAPI_REMAP)) { + xfs_fsblock_t bno; + + bno = div_u64_rem(del->br_startblock, + mp->m_sb.sb_rextsize, &mod); + ASSERT(mod == 0); + + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + } + do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index b40a4e80f5ee..b876b44c0204 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -15,8 +15,8 @@ */ #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ #define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ -#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ +#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ +#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ typedef struct xfs_da_blkinfo { __be32 forw; /* previous block in list */ @@ -35,8 +35,8 @@ typedef struct xfs_da_blkinfo { */ #define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ #define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ -#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ +#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v3 dirlf single blks */ +#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v3 dirlf multi blks */ struct xfs_da3_blkinfo { /* @@ -61,7 +61,7 @@ struct xfs_da3_blkinfo { * Since we have duplicate keys, use a binary search but always follow * all match in the block, not just the first match found. */ -#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ +#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ typedef struct xfs_da_node_hdr { struct xfs_da_blkinfo info; /* block type, links, etc. */ @@ -746,14 +746,14 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) */ static inline int xfs_attr_leaf_entsize_remote(int nlen) { - return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); + return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 + + nlen, XFS_ATTR_LEAF_NAME_ALIGN); } static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) { - return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); + return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 + + nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN); } static inline int xfs_attr_leaf_entsize_local_max(int bsize) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index d8f586256add..eff4a127188e 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -16,6 +16,8 @@ #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_trace.h" +#include "xfs_icache.h" +#include "xfs_log.h" /* * Deferred Operations in XFS @@ -186,8 +188,9 @@ xfs_defer_create_intent( { const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; - dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, - dfp->dfp_count, sort); + if (!dfp->dfp_intent) + dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, + dfp->dfp_count, sort); } /* @@ -312,22 +315,6 @@ xfs_defer_trans_roll( } /* - * Reset an already used dfops after finish. - */ -static void -xfs_defer_reset( - struct xfs_trans *tp) -{ - ASSERT(list_empty(&tp->t_dfops)); - - /* - * Low mode state transfers across transaction rolls to mirror dfops - * lifetime. Clear it now that dfops is reset. - */ - tp->t_flags &= ~XFS_TRANS_LOWMODE; -} - -/* * Free up any items left in the list. */ static void @@ -360,6 +347,58 @@ xfs_defer_cancel_list( } /* + * Prevent a log intent item from pinning the tail of the log by logging a + * done item to release the intent item; and then log a new intent item. + * The caller should provide a fresh transaction and roll it after we're done. + */ +static int +xfs_defer_relog( + struct xfs_trans **tpp, + struct list_head *dfops) +{ + struct xlog *log = (*tpp)->t_mountp->m_log; + struct xfs_defer_pending *dfp; + xfs_lsn_t threshold_lsn = NULLCOMMITLSN; + + + ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); + + list_for_each_entry(dfp, dfops, dfp_list) { + /* + * If the log intent item for this deferred op is not a part of + * the current log checkpoint, relog the intent item to keep + * the log tail moving forward. We're ok with this being racy + * because an incorrect decision means we'll be a little slower + * at pushing the tail. + */ + if (dfp->dfp_intent == NULL || + xfs_log_item_in_current_chkpt(dfp->dfp_intent)) + continue; + + /* + * Figure out where we need the tail to be in order to maintain + * the minimum required free space in the log. Only sample + * the log threshold once per call. + */ + if (threshold_lsn == NULLCOMMITLSN) { + threshold_lsn = xlog_grant_push_threshold(log, 0); + if (threshold_lsn == NULLCOMMITLSN) + break; + } + if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0) + continue; + + trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp); + XFS_STATS_INC((*tpp)->t_mountp, defer_relog); + dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp); + } + + if ((*tpp)->t_flags & XFS_TRANS_DIRTY) + return xfs_defer_trans_roll(tpp); + return 0; +} + +/* * Log an intent-done item for the first pending intent, and finish the work * items. */ @@ -390,6 +429,7 @@ xfs_defer_finish_one( list_add(li, &dfp->dfp_work); dfp->dfp_count++; dfp->dfp_done = NULL; + dfp->dfp_intent = NULL; xfs_defer_create_intent(tp, dfp, false); } @@ -428,13 +468,27 @@ xfs_defer_finish_noroll( /* Until we run out of pending work to finish... */ while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) { + /* + * Deferred items that are created in the process of finishing + * other deferred work items should be queued at the head of + * the pending list, which puts them ahead of the deferred work + * that was created by the caller. This keeps the number of + * pending work items to a minimum, which decreases the amount + * of time that any one intent item can stick around in memory, + * pinning the log tail. + */ xfs_defer_create_intents(*tp); - list_splice_tail_init(&(*tp)->t_dfops, &dop_pending); + list_splice_init(&(*tp)->t_dfops, &dop_pending); error = xfs_defer_trans_roll(tp); if (error) goto out_shutdown; + /* Possibly relog intent items to keep the log moving. */ + error = xfs_defer_relog(tp, &dop_pending); + if (error) + goto out_shutdown; + dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, dfp_list); error = xfs_defer_finish_one(*tp, dfp); @@ -475,7 +529,10 @@ xfs_defer_finish( return error; } } - xfs_defer_reset(*tp); + + /* Reset LOWMODE now that we've finished all the dfops. */ + ASSERT(list_empty(&(*tp)->t_dfops)); + (*tp)->t_flags &= ~XFS_TRANS_LOWMODE; return 0; } @@ -549,6 +606,139 @@ xfs_defer_move( * that behavior. */ dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE); + stp->t_flags &= ~XFS_TRANS_LOWMODE; +} + +/* + * Prepare a chain of fresh deferred ops work items to be completed later. Log + * recovery requires the ability to put off until later the actual finishing + * work so that it can process unfinished items recovered from the log in + * correct order. + * + * Create and log intent items for all the work that we're capturing so that we + * can be assured that the items will get replayed if the system goes down + * before log recovery gets a chance to finish the work it put off. The entire + * deferred ops state is transferred to the capture structure and the + * transaction is then ready for the caller to commit it. If there are no + * intent items to capture, this function returns NULL. + * + * If capture_ip is not NULL, the capture structure will obtain an extra + * reference to the inode. + */ +static struct xfs_defer_capture * +xfs_defer_ops_capture( + struct xfs_trans *tp, + struct xfs_inode *capture_ip) +{ + struct xfs_defer_capture *dfc; + + if (list_empty(&tp->t_dfops)) + return NULL; + + /* Create an object to capture the defer ops. */ + dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS); + INIT_LIST_HEAD(&dfc->dfc_list); + INIT_LIST_HEAD(&dfc->dfc_dfops); + + xfs_defer_create_intents(tp); + + /* Move the dfops chain and transaction state to the capture struct. */ + list_splice_init(&tp->t_dfops, &dfc->dfc_dfops); + dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE; + tp->t_flags &= ~XFS_TRANS_LOWMODE; + + /* Capture the remaining block reservations along with the dfops. */ + dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used; + dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used; + + /* Preserve the log reservation size. */ + dfc->dfc_logres = tp->t_log_res; + + /* + * Grab an extra reference to this inode and attach it to the capture + * structure. + */ + if (capture_ip) { + ihold(VFS_I(capture_ip)); + dfc->dfc_capture_ip = capture_ip; + } + + return dfc; +} + +/* Release all resources that we used to capture deferred ops. */ +void +xfs_defer_ops_release( + struct xfs_mount *mp, + struct xfs_defer_capture *dfc) +{ + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); + if (dfc->dfc_capture_ip) + xfs_irele(dfc->dfc_capture_ip); + kmem_free(dfc); +} + +/* + * Capture any deferred ops and commit the transaction. This is the last step + * needed to finish a log intent item that we recovered from the log. If any + * of the deferred ops operate on an inode, the caller must pass in that inode + * so that the reference can be transferred to the capture structure. The + * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling + * xfs_defer_ops_continue. + */ +int +xfs_defer_ops_capture_and_commit( + struct xfs_trans *tp, + struct xfs_inode *capture_ip, + struct list_head *capture_list) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_defer_capture *dfc; + int error; + + ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL)); + + /* If we don't capture anything, commit transaction and exit. */ + dfc = xfs_defer_ops_capture(tp, capture_ip); + if (!dfc) + return xfs_trans_commit(tp); + + /* Commit the transaction and add the capture structure to the list. */ + error = xfs_trans_commit(tp); + if (error) { + xfs_defer_ops_release(mp, dfc); + return error; + } + + list_add_tail(&dfc->dfc_list, capture_list); + return 0; +} + +/* + * Attach a chain of captured deferred ops to a new transaction and free the + * capture structure. If an inode was captured, it will be passed back to the + * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0. + * The caller now owns the inode reference. + */ +void +xfs_defer_ops_continue( + struct xfs_defer_capture *dfc, + struct xfs_trans *tp, + struct xfs_inode **captured_ipp) +{ + ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY)); + + /* Lock and join the captured inode to the new transaction. */ + if (dfc->dfc_capture_ip) { + xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0); + } + *captured_ipp = dfc->dfc_capture_ip; + + /* Move captured dfops chain and state to the transaction. */ + list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); + tp->t_flags |= dfc->dfc_tpflags; - xfs_defer_reset(stp); + kmem_free(dfc); } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 6b2ca580f2b0..05472f71fffe 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -8,6 +8,7 @@ struct xfs_btree_cur; struct xfs_defer_op_type; +struct xfs_defer_capture; /* * Header for deferred operation list. @@ -63,4 +64,40 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type; extern const struct xfs_defer_op_type xfs_extent_free_defer_type; extern const struct xfs_defer_op_type xfs_agfl_free_defer_type; +/* + * This structure enables a dfops user to detach the chain of deferred + * operations from a transaction so that they can be continued later. + */ +struct xfs_defer_capture { + /* List of other capture structures. */ + struct list_head dfc_list; + + /* Deferred ops state saved from the transaction. */ + struct list_head dfc_dfops; + unsigned int dfc_tpflags; + + /* Block reservations for the data and rt devices. */ + unsigned int dfc_blkres; + unsigned int dfc_rtxres; + + /* Log reservation saved from the transaction. */ + unsigned int dfc_logres; + + /* + * An inode reference that must be maintained to complete the deferred + * work. + */ + struct xfs_inode *dfc_capture_ip; +}; + +/* + * Functions to capture a chain of deferred operations and continue them later. + * This doesn't normally happen except log recovery. + */ +int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, + struct xfs_inode *capture_ip, struct list_head *capture_list); +void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, + struct xfs_inode **captured_ipp); +void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d); + #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 536666143fe7..ef5eaf33d146 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -17,7 +17,7 @@ struct xfs_dinode; */ struct xfs_icdinode { uint16_t di_flushiter; /* incremented on flush */ - uint32_t di_projid; /* owner's project id */ + prid_t di_projid; /* owner's project id */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 27c39268c31f..340c83f76c80 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2505,12 +2505,15 @@ xfs_rmap_map_extent( int whichfork, struct xfs_bmbt_irec *PREV) { + enum xfs_rmap_intent_type type = XFS_RMAP_MAP; + if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return; - __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? - XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino, - whichfork, PREV); + if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) + type = XFS_RMAP_MAP_SHARED; + + __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* Unmap an extent out of a file. */ @@ -2521,12 +2524,15 @@ xfs_rmap_unmap_extent( int whichfork, struct xfs_bmbt_irec *PREV) { + enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP; + if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork)) return; - __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? - XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino, - whichfork, PREV); + if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) + type = XFS_RMAP_UNMAP_SHARED; + + __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* @@ -2543,12 +2549,15 @@ xfs_rmap_convert_extent( int whichfork, struct xfs_bmbt_irec *PREV) { + enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT; + if (!xfs_rmap_update_is_needed(mp, whichfork)) return; - __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ? - XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino, - whichfork, PREV); + if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip)) + type = XFS_RMAP_CONVERT_SHARED; + + __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV); } /* Schedule the creation of an rmap for non-file data. */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 1d9fa8a300f1..6c1aba16113c 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1018,7 +1018,6 @@ xfs_rtalloc_query_range( struct xfs_mount *mp = tp->t_mountp; xfs_rtblock_t rtstart; xfs_rtblock_t rtend; - xfs_rtblock_t rem; int is_free; int error = 0; @@ -1027,13 +1026,12 @@ xfs_rtalloc_query_range( if (low_rec->ar_startext >= mp->m_sb.sb_rextents || low_rec->ar_startext == high_rec->ar_startext) return 0; - if (high_rec->ar_startext > mp->m_sb.sb_rextents) - high_rec->ar_startext = mp->m_sb.sb_rextents; + high_rec->ar_startext = min(high_rec->ar_startext, + mp->m_sb.sb_rextents - 1); /* Iterate the bitmap, looking for discrepancies. */ rtstart = low_rec->ar_startext; - rem = high_rec->ar_startext - rtstart; - while (rem) { + while (rtstart <= high_rec->ar_startext) { /* Is the first block free? */ error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend, &is_free); @@ -1042,7 +1040,7 @@ xfs_rtalloc_query_range( /* How long does the extent go for? */ error = xfs_rtfind_forw(mp, tp, rtstart, - high_rec->ar_startext - 1, &rtend); + high_rec->ar_startext, &rtend); if (error) break; @@ -1055,7 +1053,6 @@ xfs_rtalloc_query_range( break; } - rem -= rtend - rtstart + 1; rtstart = rtend + 1; } diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index e56786f0a13c..653f3280e1c1 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -441,6 +441,20 @@ xchk_da_btree_block( goto out_freebp; } + /* + * If we've been handed a block that is below the dabtree root, does + * its hashval match what the parent block expected to see? + */ + if (level > 0) { + struct xfs_da_node_entry *key; + + key = xchk_da_btree_node_entry(ds, level - 1); + if (be32_to_cpu(key->hashval) != blk->hashval) { + xchk_da_set_corrupt(ds, level); + goto out_freebp; + } + } + out: return error; out_freebp: diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index ec3691372e7c..9e16a4d0f97c 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -24,6 +24,7 @@ #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_quota.h" kmem_zone_t *xfs_bui_zone; kmem_zone_t *xfs_bud_zone; @@ -423,30 +424,26 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = { STATIC int xfs_bui_item_recover( struct xfs_log_item *lip, - struct xfs_trans *parent_tp) + struct list_head *capture_list) { struct xfs_bmbt_irec irec; struct xfs_bui_log_item *buip = BUI_ITEM(lip); struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_mount *mp = parent_tp->t_mountp; + struct xfs_mount *mp = lip->li_mountp; struct xfs_map_extent *bmap; struct xfs_bud_log_item *budp; xfs_fsblock_t startblock_fsb; xfs_fsblock_t inode_fsb; xfs_filblks_t count; xfs_exntst_t state; - enum xfs_bmap_intent_type type; - bool op_ok; unsigned int bui_type; int whichfork; int error = 0; /* Only one mapping operation per BUI... */ - if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { - xfs_bui_release(buip); + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) return -EFSCORRUPTED; - } /* * First check the validity of the extent described by the @@ -457,76 +454,58 @@ xfs_bui_item_recover( XFS_FSB_TO_DADDR(mp, bmap->me_startblock)); inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, bmap->me_owner))); - switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { + state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? + XFS_EXT_UNWRITTEN : XFS_EXT_NORM; + whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; + switch (bui_type) { case XFS_BMAP_MAP: case XFS_BMAP_UNMAP: - op_ok = true; break; default: - op_ok = false; - break; + return -EFSCORRUPTED; } - if (!op_ok || startblock_fsb == 0 || + if (startblock_fsb == 0 || bmap->me_len == 0 || inode_fsb == 0 || startblock_fsb >= mp->m_sb.sb_dblocks || bmap->me_len >= mp->m_sb.sb_agblocks || inode_fsb >= mp->m_sb.sb_dblocks || - (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) { - /* - * This will pull the BUI from the AIL and - * free the memory associated with it. - */ - xfs_bui_release(buip); + (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) return -EFSCORRUPTED; - } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, - XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); + /* Grab the inode. */ + error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip); if (error) return error; - /* - * Recovery stashes all deferred ops during intent processing and - * finishes them on completion. Transfer current dfops state to this - * transaction and transfer the result back before we return. - */ - xfs_defer_move(tp, parent_tp); - budp = xfs_trans_get_bud(tp, buip); - /* Grab the inode. */ - error = xfs_iget(mp, tp, bmap->me_owner, 0, XFS_ILOCK_EXCL, &ip); + error = xfs_qm_dqattach(ip); if (error) - goto err_inode; + goto err_rele; if (VFS_I(ip)->i_nlink == 0) xfs_iflags_set(ip, XFS_IRECOVERY); - /* Process deferred bmap item. */ - state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? - XFS_EXT_UNWRITTEN : XFS_EXT_NORM; - whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - switch (bui_type) { - case XFS_BMAP_MAP: - case XFS_BMAP_UNMAP: - type = bui_type; - break; - default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); - error = -EFSCORRUPTED; - goto err_inode; - } + /* Allocate transaction and do the work. */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); + if (error) + goto err_rele; + + budp = xfs_trans_get_bud(tp, buip); + xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, type, ip, whichfork, - bmap->me_startoff, bmap->me_startblock, &count, state); + error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip, + whichfork, bmap->me_startoff, bmap->me_startblock, + &count, state); if (error) - goto err_inode; + goto err_cancel; if (count > 0) { - ASSERT(type == XFS_BMAP_UNMAP); + ASSERT(bui_type == XFS_BMAP_UNMAP); irec.br_startblock = bmap->me_startblock; irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; @@ -534,20 +513,24 @@ xfs_bui_item_recover( xfs_bmap_unmap_extent(tp, ip, &irec); } - xfs_defer_move(parent_tp, tp); - error = xfs_trans_commit(tp); + /* + * Commit transaction, which frees the transaction and saves the inode + * for later replay activities. + */ + error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list); + if (error) + goto err_unlock; + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_irele(ip); + return 0; - return error; - -err_inode: - xfs_defer_move(parent_tp, tp); +err_cancel: xfs_trans_cancel(tp); - if (ip) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_irele(ip); - } +err_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); +err_rele: + xfs_irele(ip); return error; } @@ -559,6 +542,32 @@ xfs_bui_item_match( return BUI_ITEM(lip)->bui_format.bui_id == intent_id; } +/* Relog an intent item to push the log tail forward. */ +static struct xfs_log_item * +xfs_bui_item_relog( + struct xfs_log_item *intent, + struct xfs_trans *tp) +{ + struct xfs_bud_log_item *budp; + struct xfs_bui_log_item *buip; + struct xfs_map_extent *extp; + unsigned int count; + + count = BUI_ITEM(intent)->bui_format.bui_nextents; + extp = BUI_ITEM(intent)->bui_format.bui_extents; + + tp->t_flags |= XFS_TRANS_DIRTY; + budp = xfs_trans_get_bud(tp, BUI_ITEM(intent)); + set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); + + buip = xfs_bui_init(tp->t_mountp); + memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp)); + atomic_set(&buip->bui_next_extent, count); + xfs_trans_add_item(tp, &buip->bui_item); + set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); + return &buip->bui_item; +} + static const struct xfs_item_ops xfs_bui_item_ops = { .iop_size = xfs_bui_item_size, .iop_format = xfs_bui_item_format, @@ -566,6 +575,7 @@ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_release = xfs_bui_item_release, .iop_recover = xfs_bui_item_recover, .iop_match = xfs_bui_item_match, + .iop_relog = xfs_bui_item_relog, }; /* diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 24c7a8d11e1a..d44e8b4a3391 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -719,6 +719,8 @@ xlog_recover_get_buf_lsn( case XFS_ABTC_MAGIC: case XFS_RMAP_CRC_MAGIC: case XFS_REFC_CRC_MAGIC: + case XFS_FIBT_CRC_MAGIC: + case XFS_FIBT_MAGIC: case XFS_IBT_CRC_MAGIC: case XFS_IBT_MAGIC: { struct xfs_btree_block *btb = blk; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 3072814e407d..1d95ed387d66 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -831,8 +831,8 @@ xfs_qm_dqget_checks( } /* - * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked - * dquot, doing an allocation (if requested) as needed. + * Given the file system, id, and type (UDQUOT/GDQUOT/PDQUOT), return a + * locked dquot, doing an allocation (if requested) as needed. */ int xfs_qm_dqget( diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6cb8cd11072a..6c11bfc3d452 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -585,10 +585,10 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { STATIC int xfs_efi_item_recover( struct xfs_log_item *lip, - struct xfs_trans *parent_tp) + struct list_head *capture_list) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); - struct xfs_mount *mp = parent_tp->t_mountp; + struct xfs_mount *mp = lip->li_mountp; struct xfs_efd_log_item *efdp; struct xfs_trans *tp; struct xfs_extent *extp; @@ -608,14 +608,8 @@ xfs_efi_item_recover( if (startblock_fsb == 0 || extp->ext_len == 0 || startblock_fsb >= mp->m_sb.sb_dblocks || - extp->ext_len >= mp->m_sb.sb_agblocks) { - /* - * This will pull the EFI from the AIL and - * free the memory associated with it. - */ - xfs_efi_release(efip); + extp->ext_len >= mp->m_sb.sb_agblocks) return -EFSCORRUPTED; - } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); @@ -633,8 +627,7 @@ xfs_efi_item_recover( } - error = xfs_trans_commit(tp); - return error; + return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); abort_error: xfs_trans_cancel(tp); @@ -649,6 +642,34 @@ xfs_efi_item_match( return EFI_ITEM(lip)->efi_format.efi_id == intent_id; } +/* Relog an intent item to push the log tail forward. */ +static struct xfs_log_item * +xfs_efi_item_relog( + struct xfs_log_item *intent, + struct xfs_trans *tp) +{ + struct xfs_efd_log_item *efdp; + struct xfs_efi_log_item *efip; + struct xfs_extent *extp; + unsigned int count; + + count = EFI_ITEM(intent)->efi_format.efi_nextents; + extp = EFI_ITEM(intent)->efi_format.efi_extents; + + tp->t_flags |= XFS_TRANS_DIRTY; + efdp = xfs_trans_get_efd(tp, EFI_ITEM(intent), count); + efdp->efd_next_extent = count; + memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp)); + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); + + efip = xfs_efi_init(tp->t_mountp, count); + memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp)); + atomic_set(&efip->efi_next_extent, count); + xfs_trans_add_item(tp, &efip->efi_item); + set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); + return &efip->efi_item; +} + static const struct xfs_item_ops xfs_efi_item_ops = { .iop_size = xfs_efi_item_size, .iop_format = xfs_efi_item_format, @@ -656,6 +677,7 @@ static const struct xfs_item_ops xfs_efi_item_ops = { .iop_release = xfs_efi_item_release, .iop_recover = xfs_efi_item_recover, .iop_match = xfs_efi_item_match, + .iop_relog = xfs_efi_item_relog, }; /* diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 1a88025e68a3..db23e455eb91 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -33,39 +33,7 @@ enum xfs_fstrm_alloc { /* * Allocation group filestream associations are tracked with per-ag atomic * counters. These counters allow xfs_filestream_pick_ag() to tell whether a - * particular AG already has active filestreams associated with it. The mount - * point's m_peraglock is used to protect these counters from per-ag array - * re-allocation during a growfs operation. When xfs_growfs_data_private() is - * about to reallocate the array, it calls xfs_filestream_flush() with the - * m_peraglock held in write mode. - * - * Since xfs_mru_cache_flush() guarantees that all the free functions for all - * the cache elements have finished executing before it returns, it's safe for - * the free functions to use the atomic counters without m_peraglock protection. - * This allows the implementation of xfs_fstrm_free_func() to be agnostic about - * whether it was called with the m_peraglock held in read mode, write mode or - * not held at all. The race condition this addresses is the following: - * - * - The work queue scheduler fires and pulls a filestream directory cache - * element off the LRU end of the cache for deletion, then gets pre-empted. - * - A growfs operation grabs the m_peraglock in write mode, flushes all the - * remaining items from the cache and reallocates the mount point's per-ag - * array, resetting all the counters to zero. - * - The work queue thread resumes and calls the free function for the element - * it started cleaning up earlier. In the process it decrements the - * filestreams counter for an AG that now has no references. - * - * With a shrinkfs feature, the above scenario could panic the system. - * - * All other uses of the following macros should be protected by either the - * m_peraglock held in read mode, or the cache's internal locking exposed by the - * interval between a call to xfs_mru_cache_lookup() and a call to - * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode - * when new elements are added to the cache. - * - * Combined, these locking rules ensure that no associations will ever exist in - * the cache that reference per-ag array elements that have since been - * reallocated. + * particular AG already has active filestreams associated with it. */ int xfs_filestream_peek_ag( diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 4eebcec4aae6..9ce5e7d5bf8f 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -26,7 +26,7 @@ #include "xfs_rtalloc.h" /* Convert an xfs_fsmap to an fsmap. */ -void +static void xfs_fsmap_from_internal( struct fsmap *dest, struct xfs_fsmap *src) @@ -155,8 +155,7 @@ xfs_fsmap_owner_from_rmap( /* getfsmap query state */ struct xfs_getfsmap_info { struct xfs_fsmap_head *head; - xfs_fsmap_format_t formatter; /* formatting fn */ - void *format_arg; /* format buffer */ + struct fsmap *fsmap_recs; /* mapping records */ struct xfs_buf *agf_bp; /* AGF, for refcount queries */ xfs_daddr_t next_daddr; /* next daddr we expect */ u64 missing_owner; /* owner of holes */ @@ -224,6 +223,20 @@ xfs_getfsmap_is_shared( return 0; } +static inline void +xfs_getfsmap_format( + struct xfs_mount *mp, + struct xfs_fsmap *xfm, + struct xfs_getfsmap_info *info) +{ + struct fsmap *rec; + + trace_xfs_getfsmap_mapping(mp, xfm); + + rec = &info->fsmap_recs[info->head->fmh_entries++]; + xfs_fsmap_from_internal(rec, xfm); +} + /* * Format a reverse mapping for getfsmap, having translated rm_startblock * into the appropriate daddr units. @@ -256,6 +269,9 @@ xfs_getfsmap_helper( /* Are we just counting mappings? */ if (info->head->fmh_count == 0) { + if (info->head->fmh_entries == UINT_MAX) + return -ECANCELED; + if (rec_daddr > info->next_daddr) info->head->fmh_entries++; @@ -285,10 +301,7 @@ xfs_getfsmap_helper( fmr.fmr_offset = 0; fmr.fmr_length = rec_daddr - info->next_daddr; fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; - error = info->formatter(&fmr, info->format_arg); - if (error) - return error; - info->head->fmh_entries++; + xfs_getfsmap_format(mp, &fmr, info); } if (info->last) @@ -320,11 +333,8 @@ xfs_getfsmap_helper( if (shared) fmr.fmr_flags |= FMR_OF_SHARED; } - error = info->formatter(&fmr, info->format_arg); - if (error) - return error; - info->head->fmh_entries++; + xfs_getfsmap_format(mp, &fmr, info); out: rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); if (info->next_daddr < rec_daddr) @@ -792,11 +802,11 @@ xfs_getfsmap_check_keys( #endif /* CONFIG_XFS_RT */ /* - * Get filesystem's extents as described in head, and format for - * output. Calls formatter to fill the user's buffer until all - * extents are mapped, until the passed-in head->fmh_count slots have - * been filled, or until the formatter short-circuits the loop, if it - * is tracking filled-in extents on its own. + * Get filesystem's extents as described in head, and format for output. Fills + * in the supplied records array until there are no more reverse mappings to + * return or head.fmh_entries == head.fmh_count. In the second case, this + * function returns -ECANCELED to indicate that more records would have been + * returned. * * Key to Confusion * ---------------- @@ -816,8 +826,7 @@ int xfs_getfsmap( struct xfs_mount *mp, struct xfs_fsmap_head *head, - xfs_fsmap_format_t formatter, - void *arg) + struct fsmap *fsmap_recs) { struct xfs_trans *tp = NULL; struct xfs_fsmap dkeys[2]; /* per-dev keys */ @@ -892,8 +901,7 @@ xfs_getfsmap( info.next_daddr = head->fmh_keys[0].fmr_physical + head->fmh_keys[0].fmr_length; - info.formatter = formatter; - info.format_arg = arg; + info.fsmap_recs = fsmap_recs; info.head = head; /* diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h index c6c57739b862..a0775788e7b1 100644 --- a/fs/xfs/xfs_fsmap.h +++ b/fs/xfs/xfs_fsmap.h @@ -27,13 +27,9 @@ struct xfs_fsmap_head { struct xfs_fsmap fmh_keys[2]; /* low and high keys */ }; -void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src); void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src); -/* fsmap to userspace formatter - copy to user & advance pointer */ -typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *); - int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head, - xfs_fsmap_format_t formatter, void *arg); + struct fsmap *out_recs); #endif /* __XFS_FSMAP_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 49624973eecc..2bfbcf28b1bd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -698,6 +698,68 @@ out_unlock: return error; } +/* Propagate di_flags from a parent inode to a child inode. */ +static void +xfs_inode_inherit_flags( + struct xfs_inode *ip, + const struct xfs_inode *pip) +{ + unsigned int di_flags = 0; + umode_t mode = VFS_I(ip)->i_mode; + + if (S_ISDIR(mode)) { + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + ip->i_d.di_extsize = pip->i_d.di_extsize; + } + if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + } else if (S_ISREG(mode)) { + if ((pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) && + xfs_sb_version_hasrealtime(&ip->i_mount->m_sb)) + di_flags |= XFS_DIFLAG_REALTIME; + if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { + di_flags |= XFS_DIFLAG_EXTSIZE; + ip->i_d.di_extsize = pip->i_d.di_extsize; + } + } + if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && + xfs_inherit_noatime) + di_flags |= XFS_DIFLAG_NOATIME; + if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && + xfs_inherit_nodump) + di_flags |= XFS_DIFLAG_NODUMP; + if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && + xfs_inherit_sync) + di_flags |= XFS_DIFLAG_SYNC; + if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && + xfs_inherit_nosymlinks) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && + xfs_inherit_nodefrag) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; + + ip->i_d.di_flags |= di_flags; +} + +/* Propagate di_flags2 from a parent inode to a child inode. */ +static void +xfs_inode_inherit_flags2( + struct xfs_inode *ip, + const struct xfs_inode *pip) +{ + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { + ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; + } + if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) + ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX; +} + /* * Allocate an inode on disk and return a copy of its in-core version. * The in-core inode is locked exclusively. Set mode, nlink, and rdev @@ -841,54 +903,10 @@ xfs_ialloc( break; case S_IFREG: case S_IFDIR: - if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - uint di_flags = 0; - - if (S_ISDIR(mode)) { - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - ip->i_d.di_extsize = pip->i_d.di_extsize; - } - if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - } else if (S_ISREG(mode)) { - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_REALTIME; - if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { - di_flags |= XFS_DIFLAG_EXTSIZE; - ip->i_d.di_extsize = pip->i_d.di_extsize; - } - } - if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && - xfs_inherit_noatime) - di_flags |= XFS_DIFLAG_NOATIME; - if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && - xfs_inherit_nodump) - di_flags |= XFS_DIFLAG_NODUMP; - if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && - xfs_inherit_sync) - di_flags |= XFS_DIFLAG_SYNC; - if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && - xfs_inherit_nosymlinks) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && - xfs_inherit_nodefrag) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - - ip->i_d.di_flags |= di_flags; - } - if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) { - if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { - ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; - ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; - } - if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) - ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX; - } + if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) + xfs_inode_inherit_flags(ip, pip); + if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) + xfs_inode_inherit_flags2(ip, pip); /* FALLTHROUGH */ case S_IFLNK: ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; @@ -1516,17 +1534,10 @@ xfs_itruncate_extents_flags( if (error) goto out; - /* - * Duplicate the transaction that has the permanent - * reservation and commit the old transaction. - */ + /* free the just unmapped extents */ error = xfs_defer_finish(&tp); if (error) goto out; - - error = xfs_trans_roll_inode(&tp, ip); - if (error) - goto out; } if (whichfork == XFS_DATA_FORK) { diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bca7659fb5c6..3fbd98f61ea5 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1716,39 +1716,17 @@ out_free_buf: return error; } -struct getfsmap_info { - struct xfs_mount *mp; - struct fsmap_head __user *data; - unsigned int idx; - __u32 last_flags; -}; - -STATIC int -xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv) -{ - struct getfsmap_info *info = priv; - struct fsmap fm; - - trace_xfs_getfsmap_mapping(info->mp, xfm); - - info->last_flags = xfm->fmr_flags; - xfs_fsmap_from_internal(&fm, xfm); - if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm, - sizeof(struct fsmap))) - return -EFAULT; - - return 0; -} - STATIC int xfs_ioc_getfsmap( struct xfs_inode *ip, struct fsmap_head __user *arg) { - struct getfsmap_info info = { NULL }; struct xfs_fsmap_head xhead = {0}; struct fsmap_head head; - bool aborted = false; + struct fsmap *recs; + unsigned int count; + __u32 last_flags = 0; + bool done = false; int error; if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) @@ -1760,38 +1738,112 @@ xfs_ioc_getfsmap( sizeof(head.fmh_keys[1].fmr_reserved))) return -EINVAL; + /* + * Use an internal memory buffer so that we don't have to copy fsmap + * data to userspace while holding locks. Start by trying to allocate + * up to 128k for the buffer, but fall back to a single page if needed. + */ + count = min_t(unsigned int, head.fmh_count, + 131072 / sizeof(struct fsmap)); + recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL); + if (!recs) { + count = min_t(unsigned int, head.fmh_count, + PAGE_SIZE / sizeof(struct fsmap)); + recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL); + if (!recs) + return -ENOMEM; + } + xhead.fmh_iflags = head.fmh_iflags; - xhead.fmh_count = head.fmh_count; xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); - info.mp = ip->i_mount; - info.data = arg; - error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); - if (error == -ECANCELED) { - error = 0; - aborted = true; - } else if (error) - return error; + head.fmh_entries = 0; + do { + struct fsmap __user *user_recs; + struct fsmap *last_rec; + + user_recs = &arg->fmh_recs[head.fmh_entries]; + xhead.fmh_entries = 0; + xhead.fmh_count = min_t(unsigned int, count, + head.fmh_count - head.fmh_entries); + + /* Run query, record how many entries we got. */ + error = xfs_getfsmap(ip->i_mount, &xhead, recs); + switch (error) { + case 0: + /* + * There are no more records in the result set. Copy + * whatever we got to userspace and break out. + */ + done = true; + break; + case -ECANCELED: + /* + * The internal memory buffer is full. Copy whatever + * records we got to userspace and go again if we have + * not yet filled the userspace buffer. + */ + error = 0; + break; + default: + goto out_free; + } + head.fmh_entries += xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; - /* If we didn't abort, set the "last" flag in the last fmx */ - if (!aborted && info.idx) { - info.last_flags |= FMR_OF_LAST; - if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags, - &info.last_flags, sizeof(info.last_flags))) - return -EFAULT; + /* + * If the caller wanted a record count or there aren't any + * new records to return, we're done. + */ + if (head.fmh_count == 0 || xhead.fmh_entries == 0) + break; + + /* Copy all the records we got out to userspace. */ + if (copy_to_user(user_recs, recs, + xhead.fmh_entries * sizeof(struct fsmap))) { + error = -EFAULT; + goto out_free; + } + + /* Remember the last record flags we copied to userspace. */ + last_rec = &recs[xhead.fmh_entries - 1]; + last_flags = last_rec->fmr_flags; + + /* Set up the low key for the next iteration. */ + xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + } while (!done && head.fmh_entries < head.fmh_count); + + /* + * If there are no more records in the query result set and we're not + * in counting mode, mark the last record returned with the LAST flag. + */ + if (done && head.fmh_count > 0 && head.fmh_entries > 0) { + struct fsmap __user *user_rec; + + last_flags |= FMR_OF_LAST; + user_rec = &arg->fmh_recs[head.fmh_entries - 1]; + + if (copy_to_user(&user_rec->fmr_flags, &last_flags, + sizeof(last_flags))) { + error = -EFAULT; + goto out_free; + } } /* copy back header */ - head.fmh_entries = xhead.fmh_entries; - head.fmh_oflags = xhead.fmh_oflags; - if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) - return -EFAULT; + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { + error = -EFAULT; + goto out_free; + } - return 0; +out_free: + kmem_free(recs); + return error; } STATIC int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 80a13c8561d8..5e165456da68 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -237,7 +237,7 @@ xfs_vn_create( umode_t mode, bool flags) { - return xfs_vn_mknod(dir, dentry, mode, 0); + return xfs_generic_create(dir, dentry, mode, 0, false); } STATIC int @@ -246,7 +246,7 @@ xfs_vn_mkdir( struct dentry *dentry, umode_t mode) { - return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); + return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false); } STATIC struct dentry * diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index ab737fed7b12..ad1009778d33 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -123,7 +123,6 @@ typedef __u32 xfs_nlink_t; #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define SYNCHRONIZE() barrier() #define __return_address __builtin_return_address(0) /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ad0c69ee8947..fa2d05e65ff1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1475,14 +1475,14 @@ xlog_commit_record( } /* - * Push on the buffer cache code if we ever use more than 75% of the on-disk - * log space. This code pushes on the lsn which would supposedly free up - * the 25% which we want to leave free. We may need to adopt a policy which - * pushes on an lsn which is further along in the log once we reach the high - * water mark. In this manner, we would be creating a low water mark. + * Compute the LSN that we'd need to push the log tail towards in order to have + * (a) enough on-disk log space to log the number of bytes specified, (b) at + * least 25% of the log space free, and (c) at least 256 blocks free. If the + * log free space already meets all three thresholds, this function returns + * NULLCOMMITLSN. */ -STATIC void -xlog_grant_push_ail( +xfs_lsn_t +xlog_grant_push_threshold( struct xlog *log, int need_bytes) { @@ -1508,7 +1508,7 @@ xlog_grant_push_ail( free_threshold = max(free_threshold, (log->l_logBBsize >> 2)); free_threshold = max(free_threshold, 256); if (free_blocks >= free_threshold) - return; + return NULLCOMMITLSN; xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, &threshold_block); @@ -1528,13 +1528,33 @@ xlog_grant_push_ail( if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) threshold_lsn = last_sync_lsn; + return threshold_lsn; +} + +/* + * Push the tail of the log if we need to do so to maintain the free log space + * thresholds set out by xlog_grant_push_threshold. We may need to adopt a + * policy which pushes on an lsn which is further along in the log once we + * reach the high water mark. In this manner, we would be creating a low water + * mark. + */ +STATIC void +xlog_grant_push_ail( + struct xlog *log, + int need_bytes) +{ + xfs_lsn_t threshold_lsn; + + threshold_lsn = xlog_grant_push_threshold(log, need_bytes); + if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log)) + return; + /* * Get the transaction layer to kick the dirty buffers out to * disk asynchronously. No point in trying to do this if * the filesystem is shutting down. */ - if (!XLOG_FORCED_SHUTDOWN(log)) - xfs_ail_push(log->l_ailp, threshold_lsn); + xfs_ail_push(log->l_ailp, threshold_lsn); } /* @@ -1604,9 +1624,7 @@ xlog_cksum( int i; int xheads; - xheads = size / XLOG_HEADER_CYCLE_SIZE; - if (size % XLOG_HEADER_CYCLE_SIZE) - xheads++; + xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE); for (i = 1; i < xheads; i++) { crc = crc32c(crc, &xhdr[i].hic_xheader, diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 1412d6993f1e..58c3fcbec94a 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -141,4 +141,6 @@ void xfs_log_quiesce(struct xfs_mount *mp); bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); bool xfs_log_in_recovery(struct xfs_mount *); +xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes); + #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a17d788921d6..a8289adc1b29 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -371,6 +371,19 @@ out: return error; } +static inline int +xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh) +{ + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + int h_size = be32_to_cpu(rh->h_size); + + if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) && + h_size > XLOG_HEADER_CYCLE_SIZE) + return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); + } + return 1; +} + /* * Potentially backup over partial log record write. * @@ -463,15 +476,7 @@ xlog_find_verify_log_record( * reset last_blk. Only when last_blk points in the middle of a log * record do we update last_blk. */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - uint h_size = be32_to_cpu(head->h_size); - - xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - xhdrs++; - } else { - xhdrs = 1; - } + xhdrs = xlog_logrec_hblks(log, head); if (*last_blk - i + extra_bblks != BTOBB(be32_to_cpu(head->h_len)) + xhdrs) @@ -1158,22 +1163,7 @@ xlog_check_unmount_rec( * below. We won't want to clear the unmount record if there is one, so * we pass the lsn of the unmount record rather than the block after it. */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - int h_size = be32_to_cpu(rhead->h_size); - int h_version = be32_to_cpu(rhead->h_version); - - if ((h_version & XLOG_VERSION_2) && - (h_size > XLOG_HEADER_CYCLE_SIZE)) { - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - hblks++; - } else { - hblks = 1; - } - } else { - hblks = 1; - } - + hblks = xlog_logrec_hblks(log, rhead); after_umount_blk = xlog_wrap_logbno(log, rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len))); @@ -2444,44 +2434,66 @@ xlog_recover_process_data( /* Take all the collected deferred ops and finish them in order. */ static int xlog_finish_defer_ops( - struct xfs_trans *parent_tp) + struct xfs_mount *mp, + struct list_head *capture_list) { - struct xfs_mount *mp = parent_tp->t_mountp; + struct xfs_defer_capture *dfc, *next; struct xfs_trans *tp; - int64_t freeblks; - uint resblks; - int error; + struct xfs_inode *ip; + int error = 0; - /* - * We're finishing the defer_ops that accumulated as a result of - * recovering unfinished intent items during log recovery. We - * reserve an itruncate transaction because it is the largest - * permanent transaction type. Since we're the only user of the fs - * right now, take 93% (15/16) of the available free blocks. Use - * weird math to avoid a 64-bit division. - */ - freeblks = percpu_counter_sum(&mp->m_fdblocks); - if (freeblks <= 0) - return -ENOSPC; - resblks = min_t(int64_t, UINT_MAX, freeblks); - resblks = (resblks * 15) >> 4; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, - 0, XFS_TRANS_RESERVE, &tp); - if (error) - return error; - /* transfer all collected dfops to this transaction */ - xfs_defer_move(tp, parent_tp); + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { + struct xfs_trans_res resv; + + /* + * Create a new transaction reservation from the captured + * information. Set logcount to 1 to force the new transaction + * to regrant every roll so that we can make forward progress + * in recovery no matter how full the log might be. + */ + resv.tr_logres = dfc->dfc_logres; + resv.tr_logcount = 1; + resv.tr_logflags = XFS_TRANS_PERM_LOG_RES; + + error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, + dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); + if (error) + return error; - return xfs_trans_commit(tp); + /* + * Transfer to this new transaction all the dfops we captured + * from recovering a single intent item. + */ + list_del_init(&dfc->dfc_list); + xfs_defer_ops_continue(dfc, tp, &ip); + + error = xfs_trans_commit(tp); + if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_irele(ip); + } + if (error) + return error; + } + + ASSERT(list_empty(capture_list)); + return 0; } -/* Is this log item a deferred action intent? */ -static inline bool xlog_item_is_intent(struct xfs_log_item *lip) +/* Release all the captured defer ops and capture structures in this list. */ +static void +xlog_abort_defer_ops( + struct xfs_mount *mp, + struct list_head *capture_list) { - return lip->li_ops->iop_recover != NULL && - lip->li_ops->iop_match != NULL; -} + struct xfs_defer_capture *dfc; + struct xfs_defer_capture *next; + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { + list_del_init(&dfc->dfc_list); + xfs_defer_ops_release(mp, dfc); + } +} /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now @@ -2502,35 +2514,23 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_trans *parent_tp; + LIST_HEAD(capture_list); struct xfs_ail_cursor cur; struct xfs_log_item *lip; struct xfs_ail *ailp; - int error; + int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif - /* - * The intent recovery handlers commit transactions to complete recovery - * for individual intents, but any new deferred operations that are - * queued during that process are held off until the very end. The - * purpose of this transaction is to serve as a container for deferred - * operations. Each intent recovery handler must transfer dfops here - * before its local transaction commits, and we'll finish the entire - * list below. - */ - error = xfs_trans_alloc_empty(log->l_mp, &parent_tp); - if (error) - return error; - ailp = log->l_ailp; spin_lock(&ailp->ail_lock); - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif - while (lip != NULL) { + for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); + lip != NULL; + lip = xfs_trans_ail_cursor_next(ailp, &cur)) { /* * We're done when we see something other than an intent. * There should be no intents left in the AIL now. @@ -2552,26 +2552,29 @@ xlog_recover_process_intents( /* * NOTE: If your intent processing routine can create more - * deferred ops, you /must/ attach them to the transaction in - * this routine or else those subsequent intents will get + * deferred ops, you /must/ attach them to the capture list in + * the recover routine or else those subsequent intents will be * replayed in the wrong order! */ - if (!test_and_set_bit(XFS_LI_RECOVERED, &lip->li_flags)) { - spin_unlock(&ailp->ail_lock); - error = lip->li_ops->iop_recover(lip, parent_tp); - spin_lock(&ailp->ail_lock); - } + spin_unlock(&ailp->ail_lock); + error = lip->li_ops->iop_recover(lip, &capture_list); + spin_lock(&ailp->ail_lock); if (error) - goto out; - lip = xfs_trans_ail_cursor_next(ailp, &cur); + break; } -out: + xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->ail_lock); - if (!error) - error = xlog_finish_defer_ops(parent_tp); - xfs_trans_cancel(parent_tp); + if (error) + goto err; + + error = xlog_finish_defer_ops(log->l_mp, &capture_list); + if (error) + goto err; + return 0; +err: + xlog_abort_defer_ops(log->l_mp, &capture_list); return error; } @@ -2878,7 +2881,8 @@ STATIC int xlog_valid_rec_header( struct xlog *log, struct xlog_rec_header *rhead, - xfs_daddr_t blkno) + xfs_daddr_t blkno, + int bufsize) { int hlen; @@ -2894,10 +2898,14 @@ xlog_valid_rec_header( return -EFSCORRUPTED; } - /* LR body must have data or it wouldn't have been written */ + /* + * LR body must have data (or it wouldn't have been written) + * and h_len must not be greater than LR buffer size. + */ hlen = be32_to_cpu(rhead->h_len); - if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > INT_MAX)) + if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) return -EFSCORRUPTED; + if (XFS_IS_CORRUPT(log->l_mp, blkno > log->l_logBBsize || blkno > INT_MAX)) return -EFSCORRUPTED; @@ -2958,9 +2966,6 @@ xlog_do_recovery_pass( goto bread_err1; rhead = (xlog_rec_header_t *)offset; - error = xlog_valid_rec_header(log, rhead, tail_blk); - if (error) - goto bread_err1; /* * xfsprogs has a bug where record length is based on lsunit but @@ -2975,30 +2980,22 @@ xlog_do_recovery_pass( */ h_size = be32_to_cpu(rhead->h_size); h_len = be32_to_cpu(rhead->h_len); - if (h_len > h_size) { - if (h_len <= log->l_mp->m_logbsize && - be32_to_cpu(rhead->h_num_logops) == 1) { - xfs_warn(log->l_mp, + if (h_len > h_size && h_len <= log->l_mp->m_logbsize && + rhead->h_num_logops == cpu_to_be32(1)) { + xfs_warn(log->l_mp, "invalid iclog size (%d bytes), using lsunit (%d bytes)", - h_size, log->l_mp->m_logbsize); - h_size = log->l_mp->m_logbsize; - } else { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, - log->l_mp); - error = -EFSCORRUPTED; - goto bread_err1; - } + h_size, log->l_mp->m_logbsize); + h_size = log->l_mp->m_logbsize; } - if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && - (h_size > XLOG_HEADER_CYCLE_SIZE)) { - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - hblks++; + error = xlog_valid_rec_header(log, rhead, tail_blk, h_size); + if (error) + goto bread_err1; + + hblks = xlog_logrec_hblks(log, rhead); + if (hblks != 1) { kmem_free(hbp); hbp = xlog_alloc_buffer(log, hblks); - } else { - hblks = 1; } } else { ASSERT(log->l_sectBBsize == 1); @@ -3070,7 +3067,7 @@ xlog_do_recovery_pass( } rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, - split_hblks ? blk_no : 0); + split_hblks ? blk_no : 0, h_size); if (error) goto bread_err2; @@ -3151,7 +3148,7 @@ xlog_do_recovery_pass( goto bread_err2; rhead = (xlog_rec_header_t *)offset; - error = xlog_valid_rec_header(log, rhead, blk_no); + error = xlog_valid_rec_header(log, rhead, blk_no, h_size); if (error) goto bread_err2; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 3f82e0c92c2d..b2a9abee8b2b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -249,7 +249,6 @@ xfs_qm_unmount_quotas( STATIC int xfs_qm_dqattach_one( struct xfs_inode *ip, - xfs_dqid_t id, xfs_dqtype_t type, bool doalloc, struct xfs_dquot **IO_idqpp) @@ -330,23 +329,23 @@ xfs_qm_dqattach_locked( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) { - error = xfs_qm_dqattach_one(ip, i_uid_read(VFS_I(ip)), - XFS_DQTYPE_USER, doalloc, &ip->i_udquot); + error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER, + doalloc, &ip->i_udquot); if (error) goto done; ASSERT(ip->i_udquot); } if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) { - error = xfs_qm_dqattach_one(ip, i_gid_read(VFS_I(ip)), - XFS_DQTYPE_GROUP, doalloc, &ip->i_gdquot); + error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP, + doalloc, &ip->i_gdquot); if (error) goto done; ASSERT(ip->i_gdquot); } if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQTYPE_PROJ, + error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ, doalloc, &ip->i_pdquot); if (error) goto done; @@ -1663,6 +1662,7 @@ xfs_qm_vop_dqalloc( } if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { + ASSERT(O_udqpp); if (!uid_eq(inode->i_uid, uid)) { /* * What we need is the dquot that has this uid, and @@ -1696,6 +1696,7 @@ xfs_qm_vop_dqalloc( } } if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { + ASSERT(O_gdqpp); if (!gid_eq(inode->i_gid, gid)) { xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, from_kgid(user_ns, gid), @@ -1713,9 +1714,10 @@ xfs_qm_vop_dqalloc( } } if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + ASSERT(O_pdqpp); if (ip->i_d.di_projid != prid) { xfs_iunlock(ip, lockflags); - error = xfs_qm_dqget(mp, (xfs_dqid_t)prid, + error = xfs_qm_dqget(mp, prid, XFS_DQTYPE_PROJ, true, &pq); if (error) { ASSERT(error != -ENOENT); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index ca93b6488377..7529eb63ce94 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -424,7 +424,7 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { STATIC int xfs_cui_item_recover( struct xfs_log_item *lip, - struct xfs_trans *parent_tp) + struct list_head *capture_list) { struct xfs_bmbt_irec irec; struct xfs_cui_log_item *cuip = CUI_ITEM(lip); @@ -432,7 +432,7 @@ xfs_cui_item_recover( struct xfs_cud_log_item *cudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; - struct xfs_mount *mp = parent_tp->t_mountp; + struct xfs_mount *mp = lip->li_mountp; xfs_fsblock_t startblock_fsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; @@ -467,14 +467,8 @@ xfs_cui_item_recover( refc->pe_len == 0 || startblock_fsb >= mp->m_sb.sb_dblocks || refc->pe_len >= mp->m_sb.sb_agblocks || - (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) { - /* - * This will pull the CUI from the AIL and - * free the memory associated with it. - */ - xfs_cui_release(cuip); + (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) return -EFSCORRUPTED; - } } /* @@ -493,12 +487,7 @@ xfs_cui_item_recover( mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; - /* - * Recovery stashes all deferred ops during intent processing and - * finishes them on completion. Transfer current dfops state to this - * transaction and transfer the result back before we return. - */ - xfs_defer_move(tp, parent_tp); + cudp = xfs_trans_get_cud(tp, cuip); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { @@ -555,13 +544,10 @@ xfs_cui_item_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - xfs_defer_move(parent_tp, tp); - error = xfs_trans_commit(tp); - return error; + return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); - xfs_defer_move(parent_tp, tp); xfs_trans_cancel(tp); return error; } @@ -574,6 +560,32 @@ xfs_cui_item_match( return CUI_ITEM(lip)->cui_format.cui_id == intent_id; } +/* Relog an intent item to push the log tail forward. */ +static struct xfs_log_item * +xfs_cui_item_relog( + struct xfs_log_item *intent, + struct xfs_trans *tp) +{ + struct xfs_cud_log_item *cudp; + struct xfs_cui_log_item *cuip; + struct xfs_phys_extent *extp; + unsigned int count; + + count = CUI_ITEM(intent)->cui_format.cui_nextents; + extp = CUI_ITEM(intent)->cui_format.cui_extents; + + tp->t_flags |= XFS_TRANS_DIRTY; + cudp = xfs_trans_get_cud(tp, CUI_ITEM(intent)); + set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); + + cuip = xfs_cui_init(tp->t_mountp, count); + memcpy(cuip->cui_format.cui_extents, extp, count * sizeof(*extp)); + atomic_set(&cuip->cui_next_extent, count); + xfs_trans_add_item(tp, &cuip->cui_item); + set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); + return &cuip->cui_item; +} + static const struct xfs_item_ops xfs_cui_item_ops = { .iop_size = xfs_cui_item_size, .iop_format = xfs_cui_item_format, @@ -581,6 +593,7 @@ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_release = xfs_cui_item_release, .iop_recover = xfs_cui_item_recover, .iop_match = xfs_cui_item_match, + .iop_relog = xfs_cui_item_relog, }; /* diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index dc5b0753cd51..7adc996ca6e3 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -467,14 +467,14 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { STATIC int xfs_rui_item_recover( struct xfs_log_item *lip, - struct xfs_trans *parent_tp) + struct list_head *capture_list) { struct xfs_rui_log_item *ruip = RUI_ITEM(lip); struct xfs_map_extent *rmap; struct xfs_rud_log_item *rudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; - struct xfs_mount *mp = parent_tp->t_mountp; + struct xfs_mount *mp = lip->li_mountp; xfs_fsblock_t startblock_fsb; enum xfs_rmap_intent_type type; xfs_exntst_t state; @@ -511,14 +511,8 @@ xfs_rui_item_recover( rmap->me_len == 0 || startblock_fsb >= mp->m_sb.sb_dblocks || rmap->me_len >= mp->m_sb.sb_agblocks || - (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) { - /* - * This will pull the RUI from the AIL and - * free the memory associated with it. - */ - xfs_rui_release(ruip); + (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) return -EFSCORRUPTED; - } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, @@ -573,8 +567,7 @@ xfs_rui_item_recover( } xfs_rmap_finish_one_cleanup(tp, rcur, error); - error = xfs_trans_commit(tp); - return error; + return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list); abort_error: xfs_rmap_finish_one_cleanup(tp, rcur, error); @@ -590,6 +583,32 @@ xfs_rui_item_match( return RUI_ITEM(lip)->rui_format.rui_id == intent_id; } +/* Relog an intent item to push the log tail forward. */ +static struct xfs_log_item * +xfs_rui_item_relog( + struct xfs_log_item *intent, + struct xfs_trans *tp) +{ + struct xfs_rud_log_item *rudp; + struct xfs_rui_log_item *ruip; + struct xfs_map_extent *extp; + unsigned int count; + + count = RUI_ITEM(intent)->rui_format.rui_nextents; + extp = RUI_ITEM(intent)->rui_format.rui_extents; + + tp->t_flags |= XFS_TRANS_DIRTY; + rudp = xfs_trans_get_rud(tp, RUI_ITEM(intent)); + set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); + + ruip = xfs_rui_init(tp->t_mountp, count); + memcpy(ruip->rui_format.rui_extents, extp, count * sizeof(*extp)); + atomic_set(&ruip->rui_next_extent, count); + xfs_trans_add_item(tp, &ruip->rui_item); + set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); + return &ruip->rui_item; +} + static const struct xfs_item_ops xfs_rui_item_ops = { .iop_size = xfs_rui_item_size, .iop_format = xfs_rui_item_format, @@ -597,6 +616,7 @@ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_release = xfs_rui_item_release, .iop_recover = xfs_rui_item_recover, .iop_match = xfs_rui_item_match, + .iop_relog = xfs_rui_item_relog, }; /* diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 5b89c12f1566..ede1baf31413 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -18,7 +18,7 @@ #include "xfs_trans_space.h" #include "xfs_icache.h" #include "xfs_rtalloc.h" - +#include "xfs_sb.h" /* * Read and return the summary information for a given extent size, @@ -778,8 +778,14 @@ xfs_growfs_rt_alloc( struct xfs_bmbt_irec map; /* block map output */ int nmap; /* number of block maps */ int resblks; /* space reservation */ + enum xfs_blft buf_type; struct xfs_trans *tp; + if (ip == mp->m_rsumip) + buf_type = XFS_BLFT_RTSUMMARY_BUF; + else + buf_type = XFS_BLFT_RTBITMAP_BUF; + /* * Allocate space to the file, as necessary. */ @@ -841,6 +847,9 @@ xfs_growfs_rt_alloc( mp->m_bsize, 0, &bp); if (error) goto out_trans_cancel; + + xfs_trans_buf_set_type(tp, bp, buf_type); + bp->b_ops = &xfs_rtbuf_ops; memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); /* @@ -1015,23 +1024,29 @@ xfs_growfs_rt( /* * Lock out other callers by grabbing the bitmap inode lock. */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); /* - * Update the bitmap inode's size. + * Update the bitmap inode's size ondisk and incore. We need + * to update the incore size so that inode inactivation won't + * punch what it thinks are "posteof" blocks. */ mp->m_rbmip->i_d.di_size = nsbp->sb_rbmblocks * nsbp->sb_blocksize; + i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); /* * Get the summary inode into the transaction. */ - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* - * Update the summary inode's size. + * Update the summary inode's size. We need to update the + * incore size so that inode inactivation won't punch what it + * thinks are "posteof" blocks. */ mp->m_rsumip->i_d.di_size = nmp->m_rsumsize; + i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE); /* * Copy summary data from old to new sizes. @@ -1087,7 +1102,13 @@ error_cancel: if (error) break; } + if (error) + goto out_free; + + /* Update secondary superblocks now the physical grow has completed */ + error = xfs_update_secondary_sbs(mp); +out_free: /* * Free the fake mp structure. */ diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index f70f1255220b..20e0534a772c 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -23,6 +23,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) uint64_t xs_xstrat_bytes = 0; uint64_t xs_write_bytes = 0; uint64_t xs_read_bytes = 0; + uint64_t defer_relog = 0; static const struct xstats_entry { char *desc; @@ -70,10 +71,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes; xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes; xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; + defer_relog += per_cpu_ptr(stats, i)->s.defer_relog; } len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); + len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n", + defer_relog); len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n", #if defined(DEBUG) 1); diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index 34d704f703d2..43ffba74f045 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -137,6 +137,7 @@ struct __xfsstats { uint64_t xs_xstrat_bytes; uint64_t xs_write_bytes; uint64_t xs_read_bytes; + uint64_t defer_relog; }; #define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t)) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index baf5de30eebb..d1b5f2d2a245 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1234,25 +1234,12 @@ xfs_fc_parse_param( case Opt_nouuid: mp->m_flags |= XFS_MOUNT_NOUUID; return 0; - case Opt_ikeep: - mp->m_flags |= XFS_MOUNT_IKEEP; - return 0; - case Opt_noikeep: - mp->m_flags &= ~XFS_MOUNT_IKEEP; - return 0; case Opt_largeio: mp->m_flags |= XFS_MOUNT_LARGEIO; return 0; case Opt_nolargeio: mp->m_flags &= ~XFS_MOUNT_LARGEIO; return 0; - case Opt_attr2: - mp->m_flags |= XFS_MOUNT_ATTR2; - return 0; - case Opt_noattr2: - mp->m_flags &= ~XFS_MOUNT_ATTR2; - mp->m_flags |= XFS_MOUNT_NOATTR2; - return 0; case Opt_filestreams: mp->m_flags |= XFS_MOUNT_FILESTREAMS; return 0; @@ -1304,6 +1291,24 @@ xfs_fc_parse_param( xfs_mount_set_dax_mode(mp, result.uint_32); return 0; #endif + /* Following mount options will be removed in September 2025 */ + case Opt_ikeep: + xfs_warn(mp, "%s mount option is deprecated.", param->key); + mp->m_flags |= XFS_MOUNT_IKEEP; + return 0; + case Opt_noikeep: + xfs_warn(mp, "%s mount option is deprecated.", param->key); + mp->m_flags &= ~XFS_MOUNT_IKEEP; + return 0; + case Opt_attr2: + xfs_warn(mp, "%s mount option is deprecated.", param->key); + mp->m_flags |= XFS_MOUNT_ATTR2; + return 0; + case Opt_noattr2: + xfs_warn(mp, "%s mount option is deprecated.", param->key); + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; + return 0; default: xfs_warn(mp, "unknown mount option [%s].", param->key); return -EINVAL; @@ -1450,6 +1455,19 @@ xfs_fc_fill_super( if (error) goto out_free_sb; + /* V4 support is undergoing deprecation. */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) { +#ifdef CONFIG_XFS_SUPPORT_V4 + xfs_warn_once(mp, + "Deprecated V4 format (crc=0) will not be supported after September 2030."); +#else + xfs_warn(mp, + "Deprecated V4 format (crc=0) not supported by kernel."); + error = -EINVAL; + goto out_free_sb; +#endif + } + /* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 021ef96d0542..fac9de7ee6d0 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -50,13 +50,45 @@ xfs_panic_mask_proc_handler( } #endif /* CONFIG_PROC_FS */ +STATIC int +xfs_deprecate_irix_sgid_inherit_proc_handler( + struct ctl_table *ctl, + int write, + void *buffer, + size_t *lenp, + loff_t *ppos) +{ + if (write) { + printk_once(KERN_WARNING + "XFS: " "%s sysctl option is deprecated.\n", + ctl->procname); + } + return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); +} + +STATIC int +xfs_deprecate_irix_symlink_mode_proc_handler( + struct ctl_table *ctl, + int write, + void *buffer, + size_t *lenp, + loff_t *ppos) +{ + if (write) { + printk_once(KERN_WARNING + "XFS: " "%s sysctl option is deprecated.\n", + ctl->procname); + } + return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); +} + static struct ctl_table xfs_table[] = { { .procname = "irix_sgid_inherit", .data = &xfs_params.sgid_inherit.val, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler, .extra1 = &xfs_params.sgid_inherit.min, .extra2 = &xfs_params.sgid_inherit.max }, @@ -65,7 +97,7 @@ static struct ctl_table xfs_table[] = { .data = &xfs_params.symlink_mode.val, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler, .extra1 = &xfs_params.symlink_mode.min, .extra2 = &xfs_params.symlink_mode.max }, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index dcdcf99cfa5d..86951652d3ed 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2533,6 +2533,7 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent); DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort); +DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent); #define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ca18a040336a..c94e71f741b6 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -959,7 +959,7 @@ xfs_trans_cancel( struct xfs_log_item *lip; list_for_each_entry(lip, &tp->t_items, li_trans) - ASSERT(!(lip->li_type == XFS_LI_EFD)); + ASSERT(!xlog_item_is_intent_done(lip)); } #endif xfs_trans_unreserve_and_mod_sb(tp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index f46534b75236..084658946cc8 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -55,14 +55,12 @@ struct xfs_log_item { #define XFS_LI_ABORTED 1 #define XFS_LI_FAILED 2 #define XFS_LI_DIRTY 3 /* log item dirty in transaction */ -#define XFS_LI_RECOVERED 4 /* log intent item has been recovered */ #define XFS_LI_FLAGS \ { (1 << XFS_LI_IN_AIL), "IN_AIL" }, \ { (1 << XFS_LI_ABORTED), "ABORTED" }, \ { (1 << XFS_LI_FAILED), "FAILED" }, \ - { (1 << XFS_LI_DIRTY), "DIRTY" }, \ - { (1 << XFS_LI_RECOVERED), "RECOVERED" } + { (1 << XFS_LI_DIRTY), "DIRTY" } struct xfs_item_ops { unsigned flags; @@ -74,10 +72,29 @@ struct xfs_item_ops { void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); void (*iop_release)(struct xfs_log_item *); xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); - int (*iop_recover)(struct xfs_log_item *lip, struct xfs_trans *tp); + int (*iop_recover)(struct xfs_log_item *lip, + struct list_head *capture_list); bool (*iop_match)(struct xfs_log_item *item, uint64_t id); + struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent, + struct xfs_trans *tp); }; +/* Is this log item a deferred action intent? */ +static inline bool +xlog_item_is_intent(struct xfs_log_item *lip) +{ + return lip->li_ops->iop_recover != NULL && + lip->li_ops->iop_match != NULL; +} + +/* Is this a log intent-done item? */ +static inline bool +xlog_item_is_intent_done(struct xfs_log_item *lip) +{ + return lip->li_ops->iop_unpin == NULL && + lip->li_ops->iop_push == NULL; +} + /* * Release the log item as soon as committed. This is for items just logging * intents that never need to be written back in place. @@ -243,4 +260,12 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, extern kmem_zone_t *xfs_trans_zone; +static inline struct xfs_log_item * +xfs_trans_item_relog( + struct xfs_log_item *lip, + struct xfs_trans *tp) +{ + return lip->li_ops->iop_relog(lip, tp); +} + #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 133fc6fc3edd..fe45b0c3970c 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -221,36 +221,27 @@ xfs_trans_mod_dquot( } switch (field) { - - /* - * regular disk blk reservation - */ - case XFS_TRANS_DQ_RES_BLKS: + /* regular disk blk reservation */ + case XFS_TRANS_DQ_RES_BLKS: qtrx->qt_blk_res += delta; break; - /* - * inode reservation - */ - case XFS_TRANS_DQ_RES_INOS: + /* inode reservation */ + case XFS_TRANS_DQ_RES_INOS: qtrx->qt_ino_res += delta; break; - /* - * disk blocks used. - */ - case XFS_TRANS_DQ_BCOUNT: + /* disk blocks used. */ + case XFS_TRANS_DQ_BCOUNT: qtrx->qt_bcount_delta += delta; break; - case XFS_TRANS_DQ_DELBCOUNT: + case XFS_TRANS_DQ_DELBCOUNT: qtrx->qt_delbcnt_delta += delta; break; - /* - * Inode Count - */ - case XFS_TRANS_DQ_ICOUNT: + /* Inode Count */ + case XFS_TRANS_DQ_ICOUNT: if (qtrx->qt_ino_res && delta > 0) { qtrx->qt_ino_res_used += delta; ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); @@ -258,17 +249,13 @@ xfs_trans_mod_dquot( qtrx->qt_icount_delta += delta; break; - /* - * rtblk reservation - */ - case XFS_TRANS_DQ_RES_RTBLKS: + /* rtblk reservation */ + case XFS_TRANS_DQ_RES_RTBLKS: qtrx->qt_rtblk_res += delta; break; - /* - * rtblk count - */ - case XFS_TRANS_DQ_RTBCOUNT: + /* rtblk count */ + case XFS_TRANS_DQ_RTBCOUNT: if (qtrx->qt_rtblk_res && delta > 0) { qtrx->qt_rtblk_res_used += delta; ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); @@ -276,11 +263,11 @@ xfs_trans_mod_dquot( qtrx->qt_rtbcount_delta += delta; break; - case XFS_TRANS_DQ_DELRTBCOUNT: + case XFS_TRANS_DQ_DELRTBCOUNT: qtrx->qt_delrtb_delta += delta; break; - default: + default: ASSERT(0); } |